;;; -*- Mode:Lisp; Package:DYPAR; Base:10; -*- ;;; LastEditDate = 15:49:28 Tue, 11-Apr-89 -- Mark Boggs ;;; XMATCH6.L ;;; ---------------------------------------------------------------- ;;; ;;; (c) Copyright, 1982, 1983 ;;; by Jaime Carbonell & Mark Boggs ;;; All rights reserved ;;; ;;; ---------------------------------------------------------------- ;;; Needs Auxiliary files general, struc and macros for low-level fnctn dfns. ;;; HISTORY ; 27-Feb-01 Moises Gil&Nuria Ripoll ; Modificamos la funcion "smatch1" para buscar sinonimos de las palabras. ; ; 27-Feb-01 Moises Gil&Nuria Ripoll ; Añadimos en la funcion "smatch1" el tratamiento del nuevo operador "<-" ; ; 22-Apr-89 Mark Boggs (boggs) at Rank Xerox, Spain ; Added !!default-assignments variable to keep track of variables ; that are assigned default values. ; ; 11-Apr-89 Mark Boggs (boggs) at Rank Xerox, Spain ; Fixed bug in := introduced in last change. ; ; 07-Apr-89 Mark Boggs (boggs) at Rank Xerox, Spain ; Added &a operator for extra variable assignments ; Changed := to search for &i results as opposed to looking in the ; car of the bindings list. ; Changed xref check to always expand non-terminals with empty ; first/last lists. ; Removed patch for last (mlast -> last). ; ; 27-Jan-89 Mark Boggs (boggs) at Rank Xerox, Spain ; Converted from msg to format. ; Converted from property list patterns to structure based patterns. ; ; 14-Jan-89 Mark Boggs (boggs) at Rank Xerox, Spain ; Converted to GCL3.1 ; memq -> memb ; last -> mlast ; putprop -> ptprop ; #+ usage eliminated and all Franz dependent calls removed ; removed calls to boundp and is-boundp as they reflected the use of ; special variable which was eliminated years ago. The ; value was replaced by nil as the only place these calls ; occured was at the top level of the pattern matcher, and as ; such there should not have been an initial value anyway. ; ; 19-Nov-85 Mark Boggs (wmb) at Carnegie-Mellon University ; Added a primitive tracing capability, which displays rewrite ; expansion. It is controlled by the value of the !show-expanded ; variable. ; ; 08-Dec-84 Mark Boggs (wmb) at Carnegie-Mellon University ; Added the &o operator for deterministic disjunction. It returns ; either a match which succeeds on the subpattern of the state of ; the match before the subpattern was tried. This is as opposed to ; both values which the non-determnistic version (?) returns. ; Also added Jaime's &sc (formerly &s) which performs disjunction ; on semantic categories. Brought over the code for caching ; partial matches and checking pattern constants, but haven't ; installed them yet. ; ; 27-Nov-84 Mark Boggs (wmb) at Carnegie-Mellon University ; Merged Stephen's changes with the Common Lisp version. Fixed ; !!xref-props global to point to the properties that are actually ; used by the matcher. Stephen also wrote a function called ; xmatch2 which is now here. ; ; 17-Sept-84 Stephen E. Morrisson (sem) at Carnegie-Mellon University ; 1/ Added &bu, which is a upto the last occurance of a pattern, as ; opposed to &u which is upto first occurance of a pattern. For frames. ; 2/ Added &frame operator. ; ; 24-Aug-84 Scott A. Safier (ss) at Carnegie-Mellon University ; added changes for conversion to common lisp. SELECTQ became CASE ; ; 03-May-84 Mark Boggs (wmb) at Carnegie-Mellon University ; Added in the new operators (and history notes) defined by Nic ; and Marion. Patched same to agree with the !non-term change. ; ; 03-May-84 Mark Boggs (wmb) at Carnegie-Mellon University ; Changed !non-term from a special variable to an unspecial one. ; This was possible since the change made in the parameters which ; are passed to extension functions. The new name for the ; parameter is non-term. The other file affected by the change ; was extend.l ; ; 20-Apr-84 Marion Kee (kee) at Carnegie-Mellon University ; Added &ui operator which consumes input up to and including its ; embedded pattern. ; ; 06-Apr-84 Nicolas Easton (mne) DEC/IST at Carnegie-Mellon University ; Added the &s operator to scan remainder of current input for ; given pattern. ; ; 02-Mar-84 Mark Boggs (wmb) at Carnegie-Mellon University ; Removed the $d operator from the pattern matcher and defined it ; as an extension function. Removed some unnecessary variables ; and changed the names of some others to be more mnemonic. ; ; 29-Feb-84 Mark Boggs (wmb) at Carnegie-Mellon University ; Changed the behaviour of the function calling mechanism. ; Replaced calls to apply with calls to funcall. Also modified + ; to behave in the same way as * ; ; 18-Jan-84 Mark Boggs (wmb) at Carnegie-Mellon University ; Changed the behaviour of * to handle nested optional subpatterns ; correctly according to the following algorithm: ; ; input consumed? | pattern remaining? | result of match ; yes yes continue matching ; yes no return result ; no yes continue matching ; no no fail ; ; 01-Dec-83 Mark Boggs (mark) at University of Stuttgart ; Rewrote the &c operator to work faster by keeping track of optional ; match paths and not following them. New functions progress, and ; smatch-pr. Aided and abetted by Jaime Carbonell. ; ; 16-Nov-83 Stephen Morrisson (sem) at Carnegie-Mellon University ; Added $d operator to xmatch. ; ; 01-Nov-83 Stephen Morrisson (sem) at Carnegie-Mellon University ; Changed so that &morph and &i do not defeat the check for ; left recursion. ; ; 28-sept-83 Stephen Morrisson (sem) at Carnegie-Mellon University ; Added &morph operator. ; ; 20-Jul-83 Jaime Carbonell (jgc) at Carnegie-Mellon University ; Added full recursive capability to the grammar interpreter. ; Detects left-recursion and any other situations that would recurr ; forever by keeping a non-terminal expansion stack ; as the fourth argument to smatch and smatch1: NON-TERM is an ; assoc list of ( . input-pos) pairs, with the last ; occurrence of in the calling sequence occuring first. ; If no input was consumed since the last expansion of the non-terminal ; in consideration, the match fails. Note that this change can slow ; the whole system (generality vs speed), but until tested the ; assumption is that the slowdown is minimal bookeeping for shallow ; grammars. ; ; 19-Oct-82 Mark Boggs (wmb) at Carnegie-Mellon University ; Added the &m operator for quick disjunction. ; ; 10-oct-82 Stephen Morrisson (sem) at Carnegie-Mellon University ; changed smatch to include &c and &u patterns. ; ; 25-Jul-82 Mark Boggs (wmb) at Carnegie-Mellon University ; Added code for spelling correction ; ; 18-Apr-82 Mark Boggs (wmb) at Carnegie-Mellon University ; Changed smatch1 to take advantage of the information generated ; by the cross-referencing function. rewrites are only matched if ; the car of the input is in their :first property list. This ; seems to make things really fly!!!! ; ; 14-Mar-82 Mark Boggs (wmb) at Carnegie-Mellon University ; Added the *var* option for variable assignment. This will when ; found in a pattern force the generation of a unique variable ; name, so that a pattern like "(* (:= *var* ) %comma)" will ; not have the side effect of blasting the earlier value of the ; variable. Improved documentation, and cursed myself for not starting ; a history sooner during the development of DYPAR. ; ; 1-Aug-82 Mark Boggs (wmb) at Carnegie-Mellon University ; Initial translation to FRANZLisp complete Sat Aug 1 00:20:51 ; ; 23-Jul-81 Jaime Carbonell (jgc) at Carnegie-Mellon University ; Created. Supermatcher (XMATCH INPUT PATTERN) Initial version ; in UCI LISP. ; ;;; *******************D E S C R I P T I O N***************************** ;;; usage: (xmatch INPUT PATTERN) ;;; INPUT is a flat list of atoms. ;;; PATTERN is defined as follows: ;;; PATTERN -> LIST pattern elements. Each pattern element is: ;;; 1) A terminal grammar node (any LISP atom). The special atom $ ;;; matches any atom in the input. ;;; 2) A non-terminal grammar node (An atom with a :REWRITE property. ;;; The value of this property is in turn a PATTERN -- not a ;;; pattern element. ;;; 3) A functional atom. (An atom with a function name as the ;;; value of its :FUNCTION property -- which is then executed, ;;; and may be user defined. ;;; 4) A LIST whose CAR = ?. Its CDR = PATTERN whose presence is ;;; optional. ;;; 5) A LIST whose CAR = !. Its CDR = LIST of PATTERNS. ! -> disjunciton ;;; 6) A LIST whose CAR = := for var assignament. Its CADR = variable ;;; name and its CDDR = PATTERN. ;;; 7) A LIST whose CAR = * and whose CDR = PATTERN. This is the KLEENE ;;; STAR; the PATTERN may appear any number of times in the input. ;;; 8) A LIST whose car = +. Same as *, but PATTERN must match at ;;; least once (KLEENE PLUS). ;;; 9) A LIST whose CAR = ^, whose CADR is a number, and whose CDDR ;;; is a PATTERN. This is exponentiation. the PATTERN must match ;;; exactly the specified number of times. ;;; 10) A LIST whose CAR is &m, and whose CDR is a flat LIST of atoms. ;;; This is a faster way of doing a disjunction where all the elements ;;; of the disjunction are atoms without :function or :rewrite properties. ;;; 11) A LIST whose CAR is &u, and whose CDR is a PATTERN. This is used to ;;; match upto but not including the PATTERN which is the CDR. ;;; 11.1) A LIST whose CAR is &bu, and whose CDR is a PATTERN. This is used ;;; to match upto but not including the last occurance of the PATTERN ;;; which is the CDR of the LIST. ;;; 12) A LIST whose CAR is &c and whose CDR is a LIST of PATTERNS. If ;;; the length of the CDR of the LIST of PATTERNS is 0 then is returns ;;; variable binding that were used in that call. If the length is 1 ;;; then it tries to match that PATTERN (i.e. with a call using that one ;;; PATTERN). If the length is more than one then it attempts to match ;;; the each PATTERN (in the CDR of the LIST) and if it succeeds then ;;; it attempts to match the remaining PATTERNS. This can be very slow. ;;; 13) SPECIAL SYMBOLS: ;;; a) When the symbol *var* occurs as the CADR of a variable ;;; assignment pattern this is taken to mean that a new ;;; variable name must be generated. ;;; b) When the CADDR of a variable assigment PATTERN is a LIST ;;; the CAR of which is the symbol &i the value of the variable ;;; is the value of the element following the &i providing ;;; the element following that element is successfully matched ;;; as a PATTERN. ;;; 14) A LIST whose CAR is &morph and whose CDR is a LIST which alternates ;;; between a member of the set {:prefix, :suffix, :ending, :root} and ;;; a PATTERN. The PATTERNS only make sense if they expand to a LIST ;;; of single words (i.e. it will only work on a PATTERN of length one). ;;; 15) A LIST whose CAR is &s, and whose CDR is a PATTERN. This is used to ;;; scan the current input segment and the remaining input for the ;;; PATTERN; matching ceases if the PATTERN does not exist in the input ;;; from the point of the &s operator to the end of the input. ;;; 16) A LIST whose CAR is &sc and whose CDR is a LIST of semantic classes. ;;; This consumes one word of input if that word is a member of any of ;;; classes listed in the cdr of the PATTERN. ;;; 17) A LIST whose CAR is &o and whose CDR is a PATTERN. This returns ;;; either the current match (if the PATTERN fails) or the value of the ;;; successful match of PATTERN. This is deterministic optionality. ;;; Of course, PATTERNS can be embedded to arbitrary depth in each ;;; other. *, +, and ^ are a bit slow. Anything in the pattern of a ;;; * MUST match some input (else patterns like (* ( A)) would cause ;;; an infinite loop. ;;; ---------------------------------------------------------------- (in-package "DYPAR"); :use '("LISP" "USER" "GCLISP")) ;;; The Macro package must be loaded at compile time. (eval-when (compile) (if (not (boundp '*macros-loaded*)) (load "macros")) (if (not (boundp '*struc-loaded*)) (load "struc")) (if (not (boundp '*babel-loaded*)) (load "babel"))) (proclaim '(optimize (speed 3))) ;;; Also we must declare the global variables to make compiled code more ;;; efficient. The purpose of the globals used or referenced in this file ;;; is as follows: ;;; !input The users input to the parser. ;;; !len The length of that input. ;;; !!punct List of punctuation used by the "$p" function. ;;; !!best-match Used to store the longest current match(es). ;;; !!current-pattern Name of the current invoking pattern passed to smatch. (proclaim '(special !input !len !!punct !!best-match !!xref-props !!last-rewrite !!last-tried !!current-pattern !!cache !!cacheflg !show-expanded !!default-assignments)) ;;; Global Variable initializations. Many of the global variables used in the ;;; parsing system are initialized in the file var.lsp, which also contains ;;; pointers to the files/functions which make use of those variables. (defvar !!best-match '(((0)))) (defvar !!current-pattern nil) (defvar !!cacheflg t) (defvar !!cache nil) (defvar !!last-rewrite nil) (defvar !!last-tried nil) (defvar !!xref-props '(:first :last :fwild :lwild)) (defvar !show-expanded nil) (defvar !!default-assignments nil) (add-message 'smatch1 :english "~&Attempt to expand an undefined nonterminal: ~S~&" :spanish "~&Intento de expandir un no-terminal no definido: ~S~&" ) ;;; SMATCH1 - the heart of the pattern matcher. This is admittedly an ;;; extremely long function. It is basically just a big case statement, where ;;; the code to be executed for each case is sometimes non-trivial. ;;; Variable correspondences. ;;; cb -> a context and bindings list. ;;; pat -> the current pattern. ;;; internal -> a flag for use in the looping constructs. ;;; optflg -> another flag for use in the looping constructs. ;;; non-term -> A stack of the non-terminals currently being expanded. ;;; recurse-depth -> counter for depth of recursion. ;;; inp -> the current input fragment. ;;; !input -> the complete user input string. ;;; n -> the current input pointer (also the car of cb) ;;; old-bindings -> a list of the variable bindings that have occured so far. ;;; (also the cdr of cb) ;;; tmp -> a temporary variable for storing rewrite expansions, function ;;; names, and results of recursive calls to the matcher, whichever is ;;; appropriate. ;;; var -> name of the variable to be assigned a value by := ;;; !len -> the length of the entire input string. ;;; !show-expanded -> var which controls whether or not to enable primitive ;;; rule tracing. (defun smatch1 (cb pat internal optflg non-term recurse-depth) (declare (ftype (function (list t symbol symbol list integer) list) smatch) (list cb non-term) (symbol internal optflg) (integer recurse-depth) (t pat) (optimize (speed 3))) (dlet ((inp (Cnth !input (1+ (car cb)))) ((n . old-bindings) cb) (tmp) (var) (tail)) ;; Our main cond clause has deals with four different cases: ;; 2 failure conditions, and whether the pattern is an atom or list. (cond ;; If we are pointing to a piece of the input which doesn't exist ;; we should quit. ((> n !len) nil) ;; Likewise, when we are pointing at the last element in the ;; input and must consume input, but can't we should quit. ((and (eql n !len) internal) nil) ;; Having finished worrying about falling off the end of the ;; input we now look at the interesting cases. First if the ;; pattern is not a list: ((atom pat) (cond ;; If the input is equal to the pattern, or the pattern is ;; a wildcard, the match succeeds. So increment the input ;; pointer and return. ((or (equal pat (car inp)) (eql pat '$)) (list (cons (1+ n) old-bindings))) ;; Nuria Ripoll. 20F. ;; Si la palabra es un sinonimo del patron, exito. ((member (car inp) (sinonimos2 pat)) (list (cons (1+ n) old-bindings))) ;; If pattern is $r update the input pointer to point at ;; the last element of the input. If no input is left ;; leave the pointer where it is. ((eql pat '$r) (list (cons (+ n (cond (inp (length inp)) (t 0))) old-bindings))) ;;; Expanding rewrites and calling extension functions. ;;If the current pattern has a pattern field remember it. ((get-rule-hash pat) (setq tmp (get-pattern pat)) (cond ;; If there is no pattern associated with the non-terminal ;; we are trying to expand we have an undefined non-terminal. ((null tmp) (smformat 'smatch1 pat) nil) ;; If no input was consumed since the last expansion of ;; this non-terminal (this is checked by looking at the ;; non-terminals expanded stack), we halt the recursive ;; pattern invocation. ((eql n (cdr (assoc pat non-term))) nil) ;; Exploiting the cross-referencer, we check whether ;; the next word in the input can possibly match the ;; unexpanded non-terminal, then whether the possible ;; last words in the non-terminal are found anywhere ;; in the input. ((and ;; First word checking (or (get-fwild pat) (null (get-first pat)) (memb (car inp) (get-first pat)) (and (numberp (car inp)) (memb '$n (get-first pat)))) ;; Last word checking (or (get-lwild pat) (null (get-last pat)) (intersectp inp (get-last pat)) (and (memb '$n (get-last pat)) (uci-some (function numberp) inp)))) (setq !!last-rewrite pat) ; for spelling correction. ;; Primitive tracing facility. If !show-expanded is ;; non-nil then the name of the rewrite rule being ;; expanded is printed to the screen. (and !show-expanded (print-dots recurse-depth) (format t "~S~&" pat)) ;; since the non-terminal is capable of succeeding ;; we call the pattern matcher with the expansion, ;; and update the non-terminals expanded stack. (smatch (list cb) tmp internal optflg (cons (cons pat n) non-term) (1+ recurse-depth))))) ;; If the pattern has a function associated with it, we ;; apply that function to the input, and update the input ;; pointer if the function call returns a non-nil value. ;; This is for niladic extension operators. ((setq tmp (get pat ':function)) (and (funcall tmp inp) (list (cons (1+ n) old-bindings)))))) ;; There is an implicit nil as the final test to this cond ;; which the part of the code used to match against atoms. ;;; Sub-Patterns that are lists. The operators ? ! !! &m ;; If we've gotten this far, our pattern is a list. We will ;; continue processing based on the car of that list (which ;; should be a DYPAR operator). (t (case (car pat) ;; An optional element causes the match to branch, trying ;; to continue the match with both the pattern explicitly ;; in the input, and skipping over the pattern. If the ;; looping flags are both on, it only looks for the ;; explicit match. A null match is continued by returning ;; the same context and bindings list we started with. (? (nconc (cond ((and internal optflg) nil) (t (list cb))) (smatch (list cb) (cdr pat) internal optflg non-term (1+ recurse-depth)))) ;; Deterministic optionality. This will only return one ;; value for the match. If the subpattern is matched, ;; that is the value of the match. Otherwise, whatever ;; we had before is the value (paying attention to the ;; flags for avoiding endliess looping of course). (&o (or (smatch (list cb) (cdr pat) internal optflg non-term (1+ recurse-depth)) (cond ((and internal optflg) nil) (t (list cb))))) ;; The standard disjunction operator recursivly maps the ;; matcher across the list of sub-patterns returning a ;; list of the results with unsuccessful matches ;; filtered out. (! (mapcan (funl (e) (smatch (list cb) e internal optflg non-term (1+ recurse-depth))) (cdr pat))) ;; &m takes a flat list of atoms as its pattern. If the ;; first word in the current input fragment is a member of ;; the pattern, we update the input pointer and return. ;; Note: this operator is included by the grammar loader ;; in cases where it is appropriate. ;; Nuria Ripoll. 20F. ;; CODIGO ORIGINAL. ; (&m (and (memb (car inp) (cdr pat)) ; (list (cons (1+ n) old-bindings)))) ;; Si la palabra es un sinonimo del patron, exito. (&m (cond ((memb (car inp) (cdr pat)) (list (cons (1+ n) old-bindings))) (t (do ((l (cdr pat) (rest l))) ; condicion de parada ((or (null l) (memb (car inp) (sinonimos2 (first l)))) (if (null l) nil (list (cons (1+ n) old-bindings)))))))) ;; This is for deterministic disjunction. It will ;; stop after the first successful sub-pattern match. (!! (do ((l (cdr pat) (cdr l)) (ans)) ((null l) nil) (cond ((setq ans (smatch (list cb) (car l) internal optflg non-term (1+ recurse-depth))) (return ans))))) ; moisés, 22-02-01: (<- (setf (symbol-value (first (rest pat))) (first (last pat))) (list (cons n old-bindings)) ) ; el puntero se queda donde estaba ;;; Variable Assignment ;; the variable assignment pattern must first determine ;; whether or not to generate a new variable name or not. ;; This occurs when the cadr of the pattern is `*var*'. (:= (cond ((eql '*var* (cadr pat)) (setq var (newsym 'var)) (setf (get var :newsym) t)) (t (setq var (cadr pat)))) ;; now we see if the pattern argument is successfully ;; matched. (and (setq tmp (smatch (list cb) (cddr pat) internal optflg non-term (1+ recurse-depth))) ;; tmp may be a list of possible matches, so we ;; must update each of those active matches, by ;; updating the input pointer as well as the ;; binding list. (mapcar (funl (m) (cons (car m) (or ;; the cadr of m should be ;; the returning bindings list ;; If it is an atom then we are ;; returning from an &i and ;; should process the match ;; value differently. We update ;; the input pointer in the ;; normal fashion, but the ;; bindings list gets the next ;; element in m as the value to ;; associate with the var. ;;; *** The comment is out of date, and the new code might introduce strange ;;; behaviour. (if (setq tail (member t (cdr m))) (cons (cons var (cadr tail)) (append (prelist (cdr m) (- (length (cdr m)) (length tail))) (cddr tail)))) ;(format t "Value of binding: ~S~&" m) ; (and (atom (cadr m)) ; (cadr m) ; (cons (cons var ; (caddr m)) ; (cdddr m))) ;; the typical case puts var ;; into the bindings list with ;; the input segment that was ;; was matched as its cdr. (cons (cons var (prelist inp (diff (car m) n))) (cdr m))))) tmp))) ;;; Advanced Operators &u &ui &s &c ~ &n ;; A do loop without a body. If either of the exit ;; conditions are satisfied (i.e off the end of the input ;; or a successful match of the argument pattern) then ;; we check to see which condition fired. If we were off ;; the end of the input we should return nil. If the ;; pattern was matched, we should update the input pointer ;; to the place we were when the successful match started. (&u (do ((cnt n (1+ cnt))) ((or (> cnt !len) (smatch (list (cons cnt old-bindings)) (cdr pat) internal optflg non-term (1+ recurse-depth))) (cond ((> cnt !len) nil) (t (list (cons cnt old-bindings))))))) ;; &ui consumes input up to and including the pattern ;; Similar to &u above except that if the pattern matches ;; the returned value of cb is kept as the value of the ;; variable temp and returned by the cond when we exit (&ui (do ((cnt n (1+ cnt)) (temp)) ((or (> cnt !len) (setq temp (smatch (list (cons cnt old-bindings)) (cdr pat) internal optflg non-term (1+ recurse-depth)))) (cond ((> cnt !len) nil) (t temp))))) ;; Another do loop without a body. The current input ;; segment and the remaining input is scanned for an ;; occurance of the argument pattern; if the argument ;; pattern is found, parsing of the current and remaining ;; input continues; if the argument pattern is not found, ;; parsing of the input ceases. The argument pattern ;; may include any of the other DYPAR-I operators; ;; unnecessary processing will occur if the argument ;; pattern includes another &s operator. Added 6 Apr '84 ;; by Nicolas Easton DEC/IST (&s (do ((cnt n (1+ cnt))) ((or (> cnt !len) (smatch (list (cons cnt old-bindings)) (cdr pat) internal optflg non-term (1+ recurse-depth))) (cond ((> cnt !len) nil) (t (list cb)))))) ;; New optimized version of &c ;; added 12/1/83 -wmb (Stuttgart West Germany) ;; We decide what to do based on the number of pattern ;; elements passed as arguments to &c. (&c (case (length (cdr pat)) ;; if the pattern is empty return the current ;; context and bindings list. (0 cb) ;; if there is only one subpattern, &c ;; is treated as a no-op and the pattern matcher ;; gets called without anything special happening. (1 (smatch (list cb) (cadr pat) internal optflg non-term (1+ recurse-depth))) ;; when there is more than one subpattern argument ;; the results of successive calls to a modified ;; matcher are stored for reference. The modified ;; pattern matcher returns the value of the match ;; as well a copy of the patterns which succeeded. (t (let ((pm (mapcan (funl (p) (smatch-pr (list cb) p internal optflg non-term (1+ recurse-depth))) (cdr pat))) (win)) (cond ;; Two conditions are important at this ;; time if neither is satisfied, the &c ;; match fails. The first condition is ;; a check to see if the match made ;; progress in any of its branches. ((setq win (progress pm cb)) ;; when that happens we call the pattern ;; matcher again, with the same pattern, ;; except that the input consuming ;; element is removed. This is a little ;; hairy because of the possibility of ;; more than one &c subpattern ;; succeeding. (mapcan (funl (pt) (smatch (cadr pt) (list (remove (car pt) pat)) internal optflg non-term (1+ recurse-depth))) win)) ;; the other case is when some of the ;; matches succeeded but no input is ;; consumed. If a match succeeds, but ;; consumes no input, it was optional. ;; if the number of optional elements ;; which succeeded is the same as the ;; number of remaining active pattern ;; elements the match succeeds and ;; returns the current value of the cb ;; list. ((eql (length pm) (length (cdr pat))) (list cb))))))) ;; ~ will succeed only when its pattern argument is not ;; matched. It is only useful for not matching 1 word at ;; a time. When the following pattern is not matched the ;; input pointer is moved forward one word. Beware of the ;; construction (~ $) which will not match anything. (~ (and (null (smatch (list cb) (cdr pat) internal optflg non-term (1+ recurse-depth))) (list (cons (1+ n) old-bindings)))) ;; This is the same as ~ except no input is consumed. ;; That is, if the current pattern is not matched the ;; current input pointer is returned. (&n (and (null (smatch (list cb) (cdr pat) internal optflg non-term (1+ recurse-depth))) (list cb))) ;;; Iterative Operators * + ^ ;; * or kleene-star will correctly recognize optional ;; constituents of its subpatterns. `*' will fail for ;; an optional subpattern only when the top-level * has ;; consumed no input and there is no remaining pattern to ;; match against. 1-18-84 -wmb. The variable fringe ;; stores the current value of the match, and the variable ;; ans is a list of all the valid matches recognized ;; during course of the match. Fringe starts out as ;; the current state (a null match) and each time through ;; the loop tries to match the pattern argument again. (* (do ((fringe (list cb) (smatch fringe (cdr pat) (eql (caar fringe) (caar (last ans))) (null (cddr pat)) non-term (1+ recurse-depth))) ;; The variable ans is initialized to the current ;; context and bindings list, unless we are in ;; an optional pattern and are recursively calling ;; the star operator, in which case the initial ;; value is nil. (ans (and (null (and internal optflg)) (list cb)))) ;; The first time that the * subpattern fails to ;; match the value of fringe will be nil, and the ;; * will return ans as its value. ((null fringe) ans) ;; Each time through the loop we add fringe to ans. (setq ans (nconc ans fringe)))) ;; same as * except pattern must match at least once. The ;; only difference in the code is ans automatically gets ;; initialized to nil, and fringe starts out as the value ;; of a call to the matcher with the + subpattern as its ;; argument. (+ (do ((fringe (smatch (list cb) (cdr pat) 0 optflg non-term (1+ recurse-depth)) (smatch fringe (cdr pat) (eql (caar fringe) (caar (last ans))) (null (cddr pat)) non-term (1+ recurse-depth))) (ans)) ((null fringe) ans) (setq ans (nconc ans fringe)))) ;; ^ expects its subpattern to be of the form ;; "n . pattern". ctr is an index variable that ;; gets increments by one every time through the loop. ;; This is another do loop without a body. (^ (do ((ctr 0 (1+ ctr)) ;; ans starts out as the current context and ;; bindings list, and uses itself as an argument ;; to the matcher for each successive call to ;; the matcher. It remembers the results of the ;; last call. (ans (list cb) (smatch ans (cddr pat) internal optflg non-term (1+ recurse-depth)))) ;; We have two exit conditions. ((cond ;; If the counter is greater than n the ;; match is successful, so return ans ((geq ctr (cadr pat)) (return ans)) ;; If ans is empty the match failed, so ;; return nil. ((null ans) (return nil)))))) ;; For variable reference we do an assoc of the ;; current bindings list for an occurance of the ;; the variable name which is the pattern argument to = ;; remembering the result as tmp. (= (and (setq tmp (assoc (cadr pat) old-bindings)) ;; If the variable was set we call the pattern ;; matcher with the cdr of the assoc list pair as ;; its pattern. The cdr of tmp will be a list ;; of the value of the variable and thus a valid ;; pattern. (smatch (list cb) (cdr tmp) internal optflg non-term (1+ recurse-depth)))) ;; Disjunctive semantic class (&s CLASS1 CLASS2 ...) ;; from scanning matcher (scan.l) isap is defined in ;; this file. (&sc (and (isap* (car inp) (cdr pat)) (list (cons (1+ n) old-bindings)))) ;; This provides the capibility to make multiple vars ;; with the same value. If the variable argument has ;; a value that value is assigned to the new variable. ;; If the old variable has no value (doesn't exist) ;; the new variable receives an optional default value. ;; Likewise, if the old-variable argument is nil the ;; default value is assigned. (&a (cond ((setq tmp (or (cdr (assoc (caddr pat) old-bindings)) (if (consp (car (cdddr pat))) (if (eql (caar (cdddr pat)) '%f) (list (apply (cadar (cdddr pat)) nil)) (cdddr pat)) (progn (setcons (cadr pat) !!default-assignments) (cdddr pat))))) (list (cons n (cons (cons (cadr pat) tmp) old-bindings)))))) ;; I didn't write this -wmb (&morph (xmorph pat !len !input cb n inp)); 28-sept-83 sem (&bu (do ((cnt !len (1- cnt))) ((or (< cnt n) (smatch (list (cons cnt old-bindings)) (cdr pat) internal optflg non-term (1+ recurse-depth))) (cond ((< cnt n) nil) (t (list (cons cnt old-bindings))))))) (&frame (xframe (cadr pat) !len !input cb n inp)); 14-Sept-84 sem ;; The last clause in the selectq is what to do if ;; everything else failed. (t (progn ;; Again we have multiple possibilities. (cond ;; If the pattern element in the operator position ;; has a function property, remember the function. ((setq tmp (get (car pat) ':function)) ;; Temporarily store the result of applying the ;; function to the input (let ((a (funcall tmp inp pat cb internal optflg non-term (1+ recurse-depth)))) ;; If the function call returns a non-nil ;; value, update each of the currently ;; active matches to include the value ;; returned from the function call with the ;; bindings that were in effect when the ;; function was called. The assumption ;; here is that a function should return ;; a set of context and bindings lists ;; just like smatch does. Since the ;; is not passed the bindings list, it ;; must be merged with the current bindings ;; before it reflects the true state of the ;; parse. (and a (mapcar (funl (e) (append e old-bindings)) a)))) ;; Lastly, if we got this far there is something ;; wrong. (t (format t "Error in Pattern Spec: ~S~&" pat))) ))))))) ;;; SMATCH drives smatch1 across the pattern, retaining the current position ;;; in the input and any variable bindings picked up along the way. (defun smatch (cblst pattern internal optflg non-term recurse-depth) (declare (ftype (function (list t symbol symbol list integer) list) smatch1) (list cblst non-term) (symbol internal optflg) (integer recurse-depth) (t pattern) (optimize (speed 3))) ;; The outer do loop is for iteratively moving down the pattern trying ;; the inner do loop on each pattern element. When the pattern is empty ;; we return the modified context and bindings list minus any redundancy (do ((pat pattern (cdr pat))) ((null pat) (condense cblst)) ;; Our body is another do loop which modifies the contents of cblst ;; to contain only the active matches. This loop returns the new ;; cblst. (setq cblst (do ((cb cblst (cdr cb)) (ans)) ((null cb) ans) (setq ans (nconc (smatch1 (car cb) (car pat) internal optflg non-term recurse-depth) ans)) ;; This next bit setqs some global values used by the ;; spelling corrector. (and (atom (car pat)) (get-rule-hash (car pat)) (setq !!last-tried (car pat))) ; (best-match (car ans)) )))) ;;; Driver for &c pattern matching returns the match result plus the pattern ;;; that caused it. The only difference between this and smatch is in the ;;; exit clause. (defun smatch-pr (cblst pattern internal optflg non-term recurse-depth) (do ((pat pattern (cdr pat))) ;; Instead of just returning the cblst like smatch, we make a list ;; of the pattern and the cblst and return that. The double nesting ;; is because this is picked up by a mapcan and we want the list of ;; pattern and cblst to maintain its integrity. ((null pat) (and cblst (list2 pattern (condense cblst)))) (setq cblst (do ((cb cblst (cdr cb)) (ans)) ((null cb) ans) (setq ans (nconc (smatch1 (car cb) (car pat) internal optflg non-term recurse-depth) ans)) (and (atom (car pat)) (get-rule-hash (car pat)) (setq !!last-tried (car pat))) ; (best-match (car ans)) )))) ;;; Function to determine whether there has been any change in the state of ;;; the counter inside of &c. (defun progress (pat-cblst cb) (do ((pcb pat-cblst (cdr pcb)) (ans)) ((null pcb) ans) (cond ((mapcan (funl (e) (and (> (car e) (car cb)) (list e))) (cadar pcb)) (setq ans (cons (car pcb) ans)))))) ;;; XMATCH calls smatch and eliminates redundancy from the result. ;;; This is the top level pattern matching function used by DYPAR. (defun xmatch (!input pattern) (setq !len (length !input)) (subset (funl (e) (eql !len (car e))) (smatch '((0)) pattern nil nil nil 0))) ;(cdr (is-boundp 'non-term)) outdated call using a special var that no ; longer exists. Next to last arg to smatch. ;;; BEST-MATCH this is used to find the "best" match from the current ;;; group of possible matches. The variable !!best-match is used to ;;; store the currently longest parse. !!best-match must be reset by ;;; the calling function or this information is of little or no use. (defun best-match (cb) (let ((new (list cb !!last-rewrite !!current-pattern)) (old (car !!best-match))) (cond ((null cb) (cond ((and (eql (cadr old) !!last-rewrite) (eql (caddr old) !!current-pattern)) (setq old (append (prelist old 3) (list (condense (cons !!last-tried (cadddr old)))))) (setq !!best-match (cons old (cdr !!best-match)))))) ((eql (cadr cb) t) nil) ((> (car cb) (caar old)) (setq !!best-match (list new))) ((eql (car cb) (caar old)) (cond ((equal cb (car old)) (setq old (append (prelist old 3) (list (condense (cons !!last-tried (cadddr old)))))) (setq !!best-match (cons old (cdr !!best-match)))) ((and (cdar old) (cdr cb)) (setq !!best-match (cons new !!best-match))) (t (setq !!best-match (cons new (cdr !!best-match))))))))) ;;; NEWVARS Returns NEWSYMed variables from output alist, where alist is an ;;; assoc list of vars and their bindings. This function is used to place ;;; the names of the newly generated variables into the global variable ;;; !newvars (which happens in newxpar.l). (defun newvars (alist) (mapcan (funl (p) (and (consp p) (get (car p) ':newsym) (list (car p)))) alist)) ;;; Don't look at me, I didn't write it. -wmb (defun xmorph (pat !len !input cb n inp) (let ((root (cadr (member ':root (cdr pat)))) (suffix (cadr (member ':suffix (cdr pat)))) (old-len !len) (old-input !input) (morph-set (r-morph (car inp)))) ;; r-morph will return a list of sets of ;; morphed word info with the same number of ;; endings striped off. Morph word info is a list who's car is the root ;; and cdr is the endings striped off. (if (or root suffix) (do ((ans (xmorph1 (car morph-set) root suffix) (xmorph1 (car remaining-morph) root suffix)) (remaining-morph (cdr morph-set) (cdr remaining-morph))) ((or ans (null remaining-morph)) (setq !len old-len !input old-input) (mapcar (funl (temp) (append (cons (1+ n) (cdr temp)) (cdr cb))) ans)))))) (defun xmorph1 (morph-set root suffix) (if (null root) (mapcan (funl (word-set) (xmatch (cdr word-set) suffix)) morph-set) (mapcan (funl (word-set) (let ((temp (mapcan (funl (t1) (if (equal (car t1) 1) (list t1))) (xmatch (list (car word-set)) root)))) (cond ((null suffix) temp) ((null temp) nil) (t (mapcan (funl (mat) (xmorph2 mat suffix word-set)) temp))))) morph-set))) (defun xmorph2 (root-match suffix word-set) (let ((temp (xmatch (cdr word-set) suffix))) (mapcar (funl (t1) (append t1 (cdr root-match))) temp))) ;;; Xmatch2 is for recursive calls to xmatch (defun xmatch2 (!input pattern start) (let ((!len (length !input))) (subset (funl (e) (eql !len (car e))) (smatch (list (list start)) pattern nil nil nil 0)))) ; (cdr (boundp 'non-term)) carry over from !nonterm not needed. ; Define caching and cache searching functions. 2-level bucketing ; technique used now. SQRT(N) < search or store time < N. ; Hashing techniques should be used if fast hash functions are ; provided in LISP (e.g., in COMMON). Selecting good hash keys ; and a reasonable size for the hash array need thinking. (defun cache-clear (cache) (cond (cache (set cache nil)) (!!cache (setq !!cache nil)))) ; Assumes cache is a 2-level assoc list indexed first by ; start postion and second by the non-terminal being matched. ; Assumes that (cdr nil) = nil. (defun cache-search (start nt cache) (assoc nt (cdr (assoc start (or cache !!cache))))) ;(defun cache-store (start nt cblist cache) ; (cond ; ((and !!cacheflg ; (> (length (get-nt-parents nt)) 1)) ; (prog (sbucket nbucket cacheval) ; (or cache (setq cache '!!cache)) ; (cond ((setq cacheval (eval cache)) ; (cond ((setq sbucket (assoc start cacheval)) ; (cond ((setq nbucket (assoc nt (cdr sbucket))) ; (rplacd nbucket cblist)) ; (t (nconc sbucket ; (list (cons nt cblist)))))) ; (t (nconc cacheval ; (list (cons start ; (list (cons nt ; cblist)))))))) ; (t (set cache (list (cons start ; (list (cons nt cblist))))))) ; (return cblist))) ; (t cblist))) ; Match non-terminals using firsts xref, percolation ; xref, caching (if !!cacheflg is set), and full CFL ; recursion (ntstack stops non-productive recursion) ; CLAUSE FOR DEALING WITH NONTERMINALS FROM SCAN.L ; ((get-pattern elt) ; (cond ((and (setq op (assoc elt ntstack)) ; (eq ctr (cdr op))) ; (return nil)) ; ((and !!cacheflg ; (setq op (cache-search ctr elt !!cache))) ; (return (cdr op))) ; ((and (or (eq '$ (get-first elt)) ; (memb (car input) (get-first elt))) ; (kmatch input elt)) ; (return (mapcan ; (funl (p) ; (fmatch1 (cdr pattern) ; p ntstack)) ; (cache-store ctr elt ; (fmatch1 (get-pattern elt) ; (cons ctr bindings) ; (cons (cons elt ctr) ; ntstack)) ; '!!cache)))) ; (t (return nil)))) ;;; returns the first tail of cat-list whose car is in the upwards-branching ;;; isa tree starting from token, if any. Assumes a list of superordinates ;;; under the isa: property in each token. (defun isap* (token supercs) (cond ((null token) nil) ((null supercs) nil) (t (isap*1 token supercs)))) (defun isap*1 (tk supercs) (or (memb tk supercs) (uci-some (funl (e) (isap*1 e supercs)) (get tk ':isa)))) ;; kmatch matches pattern iff constants associated with the non-terminal ;; appear in the input in the order given. XREF DOES NOT GENERATE :CONSTANTS (defun kmatch (input nt) (prog (constants) (setq constants (get nt ':constants)) lp (cond ((null constants) (return t)) ((null (setq input (memb (car constants) input))) (return nil)) (t (setq input (cdr input)) (setq constants (cdr constants)) (go lp))))) (defun print-dots (number) (dotimes (n number t) (princ #\.)))