;;; -*- Mode:Lisp; Package:DYPAR; Base:10; -*-

;;;		  LastEditDate =  13:51:34  We, 05-Sep-89  -- Jesus Boti

;;; ----------------------------------------------------------------
;;;
;;; 		(c) Copyright, 1982, 1984, 1989,
;;; 			by Mark Boggs
;;;                     and Jesus Gonzalez Boticario
;;; 			All rights reserved
;;;
;;; ----------------------------------------------------------------

;;; XLOAD.LSP

;;; HISTORY
; 26-Feb-01 Moises Gil&Nuria Ripoll
;	Aadimos en la funcion "altprop", el tratamiento del nuevo operador
;	"<-" para hacer el preprocesamiento y cambiarlo de lugar en el patron.
;	Aadimos <- a !!operators.
;
; 05-Sep-89  Jesus Gonzalez Boticario para Rank Xerox, Espaa
;       Las reglas de alto nivel pueden ser a su vez de elipsis o de 
;     anafora para asignarlas adecuadamente a la variable *elipsis-rules*
;     o a la variable *anafora-rules* nada mas determinar el nombre de la 
;     regla se llama a la funcion "comprobar-elip-anafora".
;      Una vez terminada la carga de la gramatica de la funcion "loadgrammar"
;     se muestran las reglas de elipsis y de anafora que se han encontrado,
;     mediante la funcion "mostrar-elipsis-anafora".
; 28-Ago-89  Jesus Gonzalez Boticario para Rank Xerox, Espaa
;       Las reglas incluidas en la variable !!recorrules, tienen como 
;       identificativo de tipo de regla el simbolo "+>". Su parte de 
;       accion se reduce a un mensaje que le indica al usuario cosas como:
;       "A que se refiere su pregunta de CUANTOS". Es decir se han podido 
;       asignar algunas variables de la gramatica pero no se ha podido aplicar
;       ninguna regla por estar mal escrita la frase del usuario. 
; 22-Ago-89  Jesus Gonzalez Boticario para Rank Xerox, Espaa
;       Introducir al final de la funcion "loadgrammar" la inicializacion de
;       las variables de elipsis y de anafora (activate-elipsis-anafora).
; 24-Aug-89  Mark Boggs (boggs) at Rank Xerox, Spain
;	Added new ruletype: recovery rules.
;	They are the same as top-level rules except they are not kept in the
;		same place.
;	They are globally stored in !!recorules
;	For individual files they are stored in FILE-NAME-recovery
;	Fixed bug in loadgrammar so that multiply defined nts don't
;		show up in the !!nonterms list.  The fact of their
;		existence is stored in !!multiple-nts
;	Added  support
;
; 18-Apr-89  Mark Boggs (boggs) at Rank Xerox, Spain
;	Added !!non-pattern-operators variable to altprop, to use
;	in loading of extension operators.
;
; 12-Apr-89  Mark Boggs (boggs) at Rank Xerox, Spain
;	Fixed bug in metacomment facility so that values are now reset.
;	Added code to test whether or not a file (in &file) has already
;		been loaded and to store the files in a variable associated
;		with the file name.
;	Added macro symmake for constructing new symbol names.
;	Improved comments for pattern altering functions.
;
; 09-Apr-89  Mark Boggs (boggs) at Rank Xerox, Spain
;	Added direct reference to dypar package for the loader.
;	Added &file and &test features to grammar definition.
;	  &test is used to define a list of test input/parser output pairs
;	        for use by the parser-test function.
;	  &file is used to name files to be loaded automatically with the
;		grammar file.
;
; 07-Apr-89  Mark Boggs (boggs) at Rank Xerox, Spain
;	Added support for &a operator.
;	Changed dv calls to defvar calls.
;
; 15-Jan-89  Mark Boggs (boggs) at Rank Xerox, Spain
;	Changed msg calls to format calls.
;	Fixed a meta-comment bug in top-level rules.
;	Converted from property lists to hashed Lisp structures.
;	Added package support including dependency on struc.lsp.
;
; 14-Jan-89  Mark Boggs (boggs) at Rank Xerox, Spain
;	Converted for GCL3.1 usage.
;
; 08-Dec-84  Mark Boggs (wmb) at Carnegie-Mellon University
;	Updated for &sc and &o.
;
; 27-Nov-84  Mark Boggs (wmb) at Carnegie-Mellon University
;	Updated for &bu and &frame.
;
; 01-Sep-84  Scott Safier (ss) at Carnegie-Mellon University
;                         (Safier) at Carnegie Group Inc
;	Converted to common lisp.
;
; 03-May-84  Mark Boggs (wmb) at Carnegie-Mellon University
;	Merged in Nic and Marion's changes (&s and &ui)
;
; 06-Apr-84  Nicolas Easton (mne) DEC/IST at CMU
;	Added &s to taltprop.
;
; 23-Nov-83  Scott Safier (ss) at Carnegie-Mellon University
;	added readtable references.
;
; 20-Oct-83  Stephen Morrisson (sem) at Carnegie-Mellon University
;       Added &morph pattern generation.
;
; 19-Oct-82  Mark Boggs (wmb) at Carnegie-Mellon University
;	Added &m operator pattern generation.
;

(in-package "DYPAR"); :use '("LISP" "USER"))

;;; We want the macros and structure defs to be around when we compile.
(eval-when (compile)
  (if (not (boundp '*macros-loaded*)) (load "macros"))
  (if (not (boundp '*struc-loaded*)) (load "struc"))
  (if (not (boundp '*babel-loaded*)) (load "babel"))
  )


(proclaim '(special
	     $outport$ 
	     !!patvars 
	     !!nonterms
	     !!patrules
	     !!recorules
	     !!pattrans
	     !ptrace
	     !!synlist 
	     !!not-defined
	     !!multiple-nts
             !interactive
	     !!metakeys
	     !!metakeys-wo-colon
	     !!metakey-name
	     !!metakey-i-s
             !!operators
	     !!non-pattern-operators
	     *elipsis-rules*
	     *anafora-rules*))

(defvar !!not-defined nil)
(defvar !!multiple-nts nil)

(defvar !!metakeys
  '(:name :n :internal-strategy :i-s :external-strategy :e-s))

(defvar !!metakeys-wo-colon
  '(name n internal-strategy i-s external-strategy e-s))

(defvar !!metakey-name '(:name :n name n))

(defvar !!metakey-i-s '(:internal-strategy :i-s internal-strategy i-s))

(defvar !!operators '(&m ? * + ! !! &c := &i ^ &u &ui &n &s ~ =
			 &morph &bu &frame &sc &o &a <-))

;;; Initialize the symbols used to form rule names so that we can save
;;; this information when there are no rules of this type.
(initsym 'rul)
(initsym 'recover)
(initsym 'tra)

;;;                      *** MACROS ***

;;; symmake constructs a symbol based on a root, a suffix and an optional
;;; prefix.  It is smart when the root is a file name, stripping off 
;;; directories and extensions.
(defmacro symmake (root suffix &optional (prefix nil))
  `(intern (concatenate
	     'string
	     (if ,prefix ,prefix "")
	     (pathname-name ,root)
	     ,suffix)))

;;; colonp
;;;
;;; colonp checks a string to see if it starts with a colon
(defmacro colonp (atom)
  `(eql #\: (char (symbol-name ,atom) 0)))

;;; metakeyp
;;;
;;; checks if a string is one of the metakeys
(defmacro metakeyp (atom)
  `(or (memb ,atom !!metakeys-wo-colon)
       (memb ,atom !!metakeys)))

;;; readmacros
;;;
;;; The readmacro below reads ?X as (? X). Use \?X to read ? as part 
;;; of an atom pname when this readmacro is turned on.
(defun qm? (stream char)    
  (declare (ignore char))
  (cond ((eql (peek-char nil stream) #\space) '?)
	(t (list '? (read stream)))))

;;; This does the same thing as qm? except for the ~ operator.
(defun tm~ (stream char)
  (declare (ignore char))
  (cond ((eql (peek-char nil stream) #\space) '~)
	(t (list '~ (read stream)))))

;;; Since colon is reserved for the package and keyword systems we need to
;;; pull its teeth before using it.
;(defun ^colon-reader^ (stream char)
;  (declare (ignore char))
;  (if (not (char= (code-char 32) (peek-char nil stream)))
;      (intern (concatenate 'string ":" (princ-to-string (read stream))))
;      '|:|))

;;; If we pull all of its teeth as above we cannot include keywords in the
;;; RHS of top-level rules.  Hence we are reduced to looking for the places
;;; we use colons explicitly.  This function allows symbols to start with
;;; two colons, but interns all that start with one colon (except := and :)
;;; in the keyword package.  A symbol that begins with three colons would
;;; be produced having four.
(defun ^colon-reader^ (stream char)
  (declare (ignore char))
  (case (peek-char nil stream)
    (#\Space '|:|)
    (#\= (read-char stream) :=)
    (#\: (intern (concatenate
		   'string "::"
		   (string-upcase (princ-to-string (read stream))))))
    (t (intern (string-upcase
		    (princ-to-string (read stream))) 'keyword))))


;;; So as to avoid problems with the vertical bar we change it to !.
(defun vert-bar (stream char)
  (declare (ignore stream char))
  '!)

;;;;**** This implementation of lisp does not allow the user to add
;;;;**** characters to the alphabet, nor does it provide a viable
;;;;**** means of reading one character as another.
;;;;**** The only plausible solution is to redefine char-upcase
;;;;**** so as to do our  conversion for us
;(setf (symbol-function 'old-char-upcase) (symbol-function 'char-upcase))

;(setf (symbol-function 'lisp::char-upcase)
;      #'(lambda (char)
;	  (if (char= char #\)
;	      #\
;	    (old-char-upcase char))))

;;;;**** This is an exceptionally ugly solution to the problem.
;;;;**** but it beats the stone wall in front of doing it by splicing.
;;;;**** The commented code below "almost" works.

;;; We want to make sure that the  character is always read in upper case.
;(defun enye (stream char)
;  (declare (ignore char))
;  (unread-char #\ stream)
;  (values))

;;; Set up a grammar loading read table and attach the character macros.
(defvar *dypar-load-table* (copy-readtable))

(set-macro-character #\: #'^colon-reader^ nil *dypar-load-table*)
(set-macro-character #\~ #'tm~ nil *dypar-load-table*)
(set-macro-character #\| #'vert-bar nil *dypar-load-table*)
(set-macro-character #\? #'qm? nil *dypar-load-table*)
;(set-macro-character #\ #'enye nil *dypar-load-table*)
;(set-syntax-from-char #\ #\n *dypar-load-table*)

(defmacro new-recovers (file)
  `(symbol-value (concat (pathname-name ,file) "-RECOVERY")))


;;;                      *** LOADGRAMMAR ***

;;; loadgrammar
;;;
;;; Loads semantic grammar pattern action rules =>, rewrite rules ->,
;;; recovereing rules +>, and global transformation rules ::> and they're 
;;; corresponding meta-comments. Sets the meta-comment info, the rule's name,
;;; it's pattern, rewrite, and/or action. read-meta-comment test makes sure the
;;; meta-comment info is ok and assign-mc does the ptprops for the info.
(defun loadgrammar (filnam &aux (*package* (find-package "DYPAR")))

  ;;si el fichero no tiene la extension adecuada se le aade al nombre.
  (let ((raiz))
    (if (not (search ".GRA" 
		     (setq raiz (string-upcase filnam))))
      (setq filnam (concatenate 'string raiz ".GRA"))))
  
  ;; Initilize this one symbol so that it can be accessed in a push.
  (setf (new-nonterms filnam) nil)
  
  ;; Open the file associating the stream with ichan.
  (let ((ichan (open filnam))
	(old-readtable *readtable*)
	(*readtable* *dypar-load-table*)
	(tmp))
    
    ;; Do some preprocessing to pick up any files that need to be loaded,
    ;; or some associated test examples for this grammar.
    (do ((item (read ichan nil 'eof) (read ichan nil 'eof)))
	((or (atom item) (not (member (car item) '(&file &test))))
	 (setq tmp item))
      (case (car item)
	(&file (mapc (funl (e)
		       (if (not (boundp (symmake e "-LOADED*" "*")))
			   (let ((*readtable* old-readtable))
			     (load e))))
		     (cdr item))
	       (setf (symbol-value
		       (symmake filnam "-FILES"))
		     (cdr item))
	       (terpri))
	(&test (setf (symbol-value
		       (intern (concatenate
				 'string
				 (pathname-name filnam)
				 "-TESTDATA")))
		     (cdr item)))))

    ;; Main do loop for reading the grammar rules.  Since the rules
    ;; are triplets (lhs arrow rhs) we call read three times to retreive
    ;; each rule.
    (do ((lhs tmp (read ichan nil 'eof))
	 (ruletyp (read ichan nil 'eof)
		  (read ichan nil 'eof))
	 (rhs (read ichan nil 'eof)
	      (read ichan nil 'eof))
	 (rule) (trans)
	 (metacomment nil nil) (new-rules) (new-trans) (new-recos))
	
	;; exiting clause
	((or (null lhs)
	     (eql lhs 'eof))
	 (close ichan)	
	 (and !interactive
	      (smformat 'loadgrammar1))
	 
	 ;; Put the new-rules and new-trans onto the file name symbol
	 (setf (new-patrules filnam) new-rules)
	 (setq !!patrules (append new-rules !!patrules))
	 (setq !!pattrans (append new-trans !!pattrans))
	 (setq !!recorules (append new-recos !!recorules))
	 (setf (new-pattrans filnam) new-trans)
	 (setf (new-recovers filnam) new-recos)
	 t)
      
      ;; BODY
      ;; case of a meta-comment
      (cond ((not (memb ruletyp '(|::>| => -> +>)))
	     (setq metacomment (read-meta-comment lhs))
	     (setq lhs ruletyp)
	     (setq ruletyp rhs)
	     (setq rhs (read ichan nil 'eof))))
      
      ;; a msg to show the rules loading
      (ptrace "~S" ruletyp)
      
      ;; do the loading according to rule type
      (case ruletyp
	
	;; Top Level Rule
	(=> (if (not (setq rule (assign-mc-name (car metacomment))))
		(setq rule (newsym 'rul)))
	    (setq rule (comprobar-elip-anafora rule))
	    (put-rule-hash rule
			   (make-tl-rule
			     :name rule
			     :pattern (altprop lhs)
			     :action rhs))
	    (assign-mc-strat rule (cdr metacomment))
	    (setcons rule new-rules))
	
	(+> (if (not (setq rule (assign-mc-name (car metacomment))))
		(setq rule (newsym 'recover)))
	    (put-rule-hash rule
			   (make-tl-rule
			     :name rule
			     :pattern (altprop lhs)
			     :action rhs))
	    (assign-mc-strat rule (cdr metacomment))
	    (setcons rule new-recos))
	
	;; Rewrite Rule
	(-> (cond ((not (litatom lhs))
		   (smformat 'loadgrammar2 lhs))
		  (t (cond
		       ;; previously seen
		       ((memb lhs !!not-defined)
			;(x-ancestory lhs)
			;(setq !!not-defined
			;      (delete lhs !!not-defined)))
		       )
		       ;; previously defined with same definition
		       ((and (get-rule-hash lhs)
			     (equal (altprop rhs)
				    (get-pattern lhs)))
			(setf !!multiple-nts (cons lhs !!multiple-nts)))

		       ;; previously defined different definition
		       ((get-rule-hash lhs)
			(smformat 'loadgrammar3 lhs)
			(smformat 'loadgrammar4 (get-pattern lhs))
			(smformat 'loadgrammar5 (altprop rhs))
			(cond
			  ((y-or-n-p
			     "Should I use the new definition?")
			   (set-pattern lhs (altprop rhs))
			   (setq !!nonterms (nconc1 !!nonterms
						    lhs))
			   
			   ;; Put the new nonterms onto 
			   ;; the file with name symbol
			   (pushnew lhs (new-nonterms filnam))
			   (smformat 'loadgrammar7))
			  (t (smformat 'loadgrammar8))))
		       
		       ;; just a new rewrite rule
		       (t (put-rule-hash lhs
					 (make-nt
					   :name lhs
					   :pattern (altprop rhs)))
			  (setq !!nonterms (nconc1 !!nonterms lhs))
			  (pushnew lhs (new-nonterms filnam)))))))
	
	;; Transformation Rule
	(|::>| (if (not (setq trans
			      (assign-mc-name (car metacomment))))
		   (setq trans (newsym 'tra)))
	       (put-rule-hash trans
			      (make-tl-rule ; the fields are identical
				:name trans
				:pattern (altprop lhs)
				:action rhs))
	       (assign-mc-strat trans (cdr metacomment))
	       (setcons trans new-trans))
	
	;; Bad Rule
	(t (smformat 'loadgrammar9 lhs ruletyp rhs)
	   (close ichan)
	   (smformat 'loadgrammar10)
	   (return nil))))
    (mostrar-elipsis-anafora )))


;;;               *** PATTERN ALTERING FUNCTIONS ***


;;; altprop
;;;
;;; The new altprop function for dypar does everything the old one did in
;;; a cleaner fashion.  This function will discard superflous parentheses.
(defun altprop (pat)
  (cond 

    ;; If we have no pattern we exit.
    ((null pat) nil)

    ;; If the pattern is not a list we tell the user he screwed up.
    ((not (consp pat))
     (smformat 'altprop1 pat))

    ;; If we have one of the repetitive infix operators, convert it to prefix
    ((intersectp '(! !! &c) (cdr pat))
     (mkprefix-op pat))

    ;; The variable assignment operator is also prefix.
    ((eql (cadr pat) ':=)
     (list (cons ':= (cons (car pat) (altprop (cddr pat))))))

; moiss, 22-2-01:
; Para poder asignar cositas a variables de lisp
    ((eql (cadr pat) '<-)
     (list (cons '<- (cons (car pat) (altprop (cddr pat))))))

    ;; We want these operators to appear inside the sub-pattern.
    ((memb (car pat) '(? ~))
     (list (cons (car pat) (altprop (cdr pat)))))

    ;; Special processing for the non-pattern arguments
    ((eql (car pat) '&i)
     (list (append (list (car pat) (cadr pat)) (altprop (cddr pat)))))

    ;; Leave this one alone.
    ((eql (car pat) '&a) (list pat))

    ;; I didn't write this -wmb
    ((eql (car pat) '&morph)
     (list (do ((temp (cons '&morph (xload-morph pat))
		      (append temp (xload-morph pat))))
	       ((or (null (setq pat (cddr pat)))
		    (null (memb (cadr pat) '(:root :suffix :ending))))
		temp))))

    ;; If the car of the pattern is a list we must recurse.
    ((consp (car pat))
     (nconc (altprop (car pat)) (altprop (cdr pat))))

    ;; We check to see if the operator is one that does not use
    ;; pattern arguments.  If so we leave it alone, assuming that
    ;; the arguments are to be passed directly to an extension fn.
    ((member (car pat) !!non-pattern-operators)
     (if (cdr pat)
	 (list pat)
	 pat))

    ;; If the car of the pattern is an operator not treated specially
    ;; above, we need to make sure it receives the proper nesting.
    ((member (car pat) !!operators)
     (list (cons (car pat) (altprop (cdr pat)))))

    ;; Otherwise we have a simple symbol and don't need the extra nesting.
    (t (cons (car pat) (altprop (cdr pat))))))

;;; This takes a pattern list in infix form and returns a pattern list in
;;; prefix form.  Care is taken so that the sub-patterns are processed in
;;; the original order.
(defun mkprefix-op (pattern)
  (do ((pat pattern (cdr pat))
       (op (intersectp '(! !! &c) pattern))
       (newpat) (seq))
      ((null pat)
       ;; In the exit clause we add the value of the final sub-pattern to
       ;; the newly constructed prefix version, and pass the result to 
       ;; the function which converts simple calls to ! or !! into more
       ;; efficient calls to &m
       (cond ((memb op '(! !!))
	      (prestruc2 op (nconc1 newpat (altprop seq))))
	     (t (list (cons '&c (nconc1 newpat (altprop seq)))))))
    (cond
      ;; If we are looking at the operator, we have completed reading
      ;; one of the sub-patterns, and should store the result in the
      ;; new pattern we are constructing.
      ((eql (car pat) op)
       (setq newpat (nconc1 newpat (altprop seq)))
       (setq seq nil))
      ;; Otherwise we just add the next element in the input pattern
      ;; to the sub-pattern we are currently constructing.
      (t (setq seq (nconc1 seq (car pat)))))))

(defmacro taltprop (l)
  `(altprop ,l))

; macro action fix
(defun fix-imbedded-pat (pat)
  (let ((imbed-pat (caddar pat)))
    (if (and (not (atom imbed-pat))
	     (not (memb (car imbed-pat) !!operators)))
	(list (cons (caar pat) 
		    (rplacd (cdar pat) imbed-pat)))
	pat)))
				  
;;; prestruc
;;;
;;; changes a "(term ! term)" pattern to it's &m form "(&m term term)"	
(defun prestruc (l)
  (cond ((and (eql (car l) '!)
	      (uci-every (function litonly) (cdr l)))
	 (list (cons '&m (mapcar (function car) (cdr l)))))
	(t (list l))))

(defun prestruc2 (op pat)
  (cond ((uci-every (function litonly) pat)
	 (list (cons '&m (mapcar (function car) pat))))
	(t (list (cons op pat)))))

;;; xload-morph
;;;
;;; changes a ":ending" in a &morph  pattern to ":suffix" and calls
;;; altprop on the pattern following this attr of &morph
(defun xload-morph (l)
  (list 
    (if (eql ':ending (cadr l))
	':suffix
	(cadr l))
    (altprop (if (symbolp (caddr l))
		 (list (caddr l))
		 (caddr l)))))


;;;                 *** META COMMENT FUNCTIONS ***



;;; read-meta-comment
;;;
;;; usr-list has form: 
;;;  (:name val :internal-strategy val val... :external-strategy val val...)
;;; or any subset thereof. Parsing rules are as follows:
;;; 1. accept partials
;;; 2. if missing val disregard pair
;;; 3. if wrong keyword or no keyword but a val, abort load
;;; Returns a list of the three key-values pairs that where read.
(defun read-meta-comment (usr-list)
  (do ((mc-vals)
       (mclist (cddr usr-list) (cddr mclist))
       (keyword (car usr-list) (car mclist))
       (val (cadr usr-list) (cadr mclist)))
      ((null keyword)
       (reverse mc-vals))
    (cond 
      ((metakeyp keyword)
       (cond 
	 ((not (or (metakeyp val)
		   (null val)))
	  
	  ;; case everything is fine
	  ;; if val atom then pickup rest of vals and reset the mclist
	  (if (atom val)
	      (do ((mcs (cdr mclist) (cdr mcs))
		   (ele (car mclist) (car mcs))
		   (addit-vals (list val)))
		  ((or (null ele)
		       (metakeyp ele))
		   (setq mclist (memb ele mclist))
		   (setq val (reverse addit-vals)))
		(setcons ele addit-vals)))
	  (setcons (cons keyword val) mc-vals))
	 
	 ;; case metakey with no val
	 (t (smformat 'read-meta-comment1 keyword)
	    (smformat 'read-meta-comment2)
	    (setcons val mclist))))
      (t (cond 
	   ((not (metakeyp val))
	    (if (colonp keyword)
		
		;; case wrong keyword type with val
		(pause
		  "read-meta-comment"
		  "The following keyword specified the wrong type: "
		  keyword)
		
		;; case wrong keyword with val
		(pause "read-meta-comment"
		       "The following is not a keyword "
		       keyword)))
	   
	   ;; case of no keyword before legal val
	   (t (pause "read-meta-comment"
		     "You need a keyword before the value " 
		     keyword)))))))

;;; assign-mc-name
;;;
;;; This checks to see if there is a name in the metacomment. If it 
;;; exsists then it is returned to be set as the rule, otherwise nil is 
;;; returned so that a gynsym rule can be made. 
;;; metacomment is usually a list of the three key-value pairs. Here just the 
;;; name key-value pair is passed.
(defun assign-mc-name (mc-pair)
  (if (memb (car mc-pair) !!metakey-name)
      (cadr mc-pair)))

;;; assign-mc-strat
;;;
;;; puts the meta-comment strategies into the rule structure.
;;; metacomment is a list of only the two strategies here.
(defun assign-mc-strat (rule metacomment)
  (dolist (mc-pair metacomment rule)
    (if (memb (car mc-pair) !!metakey-i-s)
	(set-in-strat rule (cdr mc-pair))
	(set-ex-strat rule (cdr mc-pair)))))

(defvar *xload-loaded* t)