;;; -*- Mode:Lisp; Package:DYPAR; Base:10; -*-

;;; ----------------------------------------------------------------
;;; 
;;; 		(c) Copyright, 1989,
;;; 			by Mark Boggs
;;; 			All rights reserved
;;; 
;;; ----------------------------------------------------------------

;;; DYREAD.LSP

;;; HISTORY
; 05-Aug-89  Mark Boggs (boggs) at Rank Xerox, Spain
;	Created. Replaces old Dypar reader.
;

;;; DOCUMENTATION
;;; The behaviour of the dypar reader is controlled by the inclusion of
;;; characters in various lists.
;;; characters in the list !!special-chars are expanded into symbols.
;;; characters in the list !!ignore-chars are skipped.
;;; The variables !!thousands-separator and !!real-number-separator
;;;	control how numbers are read.
;;; characters in the list !!accented-chars have their accents stripped.
;;; ¤ is converted to ¥ in all cases.
;;; All other characters appear unchanged.
;;; The reader works by processing the user's input character by character,
;;; making modifications as detailed above, and then passing the result to
;;; the lisp reader for tokenization.

(in-package "DYPAR"); :use '("LISP" "USER"))

(proclaim '(special *dypar-readtable*
		    !!input-buffer
		    !!raw-input
		    !!special-chars
		    !!ignore-chars
		    !!accented-chars
		    !!accented-assoc
		    !!char-bindings
		    !!real-number-separator
		    !!thousands-separator
		    !!in-number))

(defvar *dypar-readtable* (copy-readtable *readtable*))

;;; !!input-buffer holds the modified user string until it is converted into
;;; lisp tokens (atoms). It is initialized to contain null characters.
(defvar !!input-buffer
  (make-array 256 :element-type 'character
	      :fill-pointer 0 :initial-element #\ ))

;;; !!raw-input stores exactly what the user typed at the keyboard
(defvar !!raw-input nil)

(defun cnvt-read-print ()
  !!raw-input)

;;; These two variables control the conversion of accented characters
;;; into their unaccented counterparts.
(defvar !!accented-chars '(#\… #\  #\Š #\‚ #\ #\ #\¡ #\• #\¢ #\— #\£ #\))
(defvar !!accented-assoc '((#\… #\a) (#\  #\a) (#\Š #\e) (#\‚ #\e) (#\ #\e)
				     (#\ #\i) (#\¡ #\i) (#\• #\o) (#\¢ #\o)
				     (#\— #\u) (#\£ #\u) (#\ #\u)))
;;; These are accented characters not included in the above.
;;; ƒ „ Ž ‡ € ˆ ‰ Œ ‹ “ ” ™ – š ˜

;;; !!special-chars is a list of all the characters that dypar will
;;; convert from characters into symbols at read time.
(defvar !!special-chars
    (mapcar #'code-char '(33 34 36 37 38 39 40 41 42 43 44 45 46 47 58 59 60
			    61 62 63 64 91 92 93 94 95 96 123 124 125 126
			    168 173 250)))
;;; The special characters in the first 128 ascii characters are included
;;; The following characters from the keyboard are not included.
;;; ¦ § ª

;;; !!ignore-chars is a list of characters to be ignored by the reader.
(defvar !!ignore-chars nil)

(defvar !!real-number-separator #\.)
(defvar !!thousands-separator #\,)

(defvar !!in-number nil)

;;; !!char-bindings is an assoc list which contains the characters and their
;;; symbolic expansions.
(defvar !!char-bindings
  (acons #\space '%space
	 (acons #\# '%hash
		(acons #\. '%period nil))))

;;; The following functions control the way the reader treats numbers.
(defun spanish-numbers ()
  (setq !!real-number-separator #\,)
  (setq !!thousands-separator #\.))

(defun english-numbers ()
  (setq !!real-number-separator #\.)
  (setq !!thousands-separator #\,))

;;; read-char-as-symbol
;;; defines the character CH to be read as the symbol SYMBOL by the reader
(defmacro read-char-as-symbol (ch symbol)
  `(progn
     (setq !!char-bindings (acons (code-char ,ch) ',symbol !!char-bindings))))
;     (set-macro-character ,(if (integerp ch)
;			       (code-char ch)
;			     ch)
;			  #'(lambda (x y)
;			      (declare (ignore x y))
;			      ',symbol) nil *dypar-readtable*)))

;;; ----------------------------------------------------------------
;;; initialize *dypar-readtable*

(read-char-as-symbol 33 %emark)
(read-char-as-symbol 34 %dquote)
(read-char-as-symbol 36 %dollar)
(read-char-as-symbol 37 %percent)
(read-char-as-symbol 38 %amper)
(read-char-as-symbol 39 %apost)
(read-char-as-symbol 40 %lparen)
(read-char-as-symbol 41 %rparen)
(read-char-as-symbol 42 %star)
(read-char-as-symbol 43 %plus)
(read-char-as-symbol 44 %comma)
(read-char-as-symbol 45 %dash)
(read-char-as-symbol 46 %period)
(read-char-as-symbol 47 %slash)
(read-char-as-symbol 58 %colon)
(read-char-as-symbol 59 %semicolon)
(read-char-as-symbol 60 %labrack)
(read-char-as-symbol 61 %equal)
(read-char-as-symbol 62 %rabrack)
(read-char-as-symbol 63 %qmark)
(read-char-as-symbol 64 %atsign)
(read-char-as-symbol 91 %lsbrack)
(read-char-as-symbol 92 %bslash)
(read-char-as-symbol 93 %rsbrack)
(read-char-as-symbol 94 %upcaret)
(read-char-as-symbol 95 %underbar)
(read-char-as-symbol 96 %bquote)
(read-char-as-symbol 123 %lcbrack)
(read-char-as-symbol 124 %vbar)
(read-char-as-symbol 125 %rcbrack)
(read-char-as-symbol 126 %tilde)
(read-char-as-symbol 168 %interogacion)
(read-char-as-symbol 173 %admiracion)
(read-char-as-symbol 250 %dot)

;(set-dispatch-macro-character
;  #\# (code-char 29)
;  #'(lambda (x y z) (declare (ignore x y z))
;	    '%period) *dypar-readtable*)

;(set-dispatch-macro-character
;  #\# #\#
;  #'(lambda (x y z) (declare (ignore x y z))
;	    '%hash) *dypar-readtable*)

;(set-syntax-from-char #\. #\. *dypar-readtable*)

;;; string-fill is used to add a symbol to the !!input-buffer string.
;;; This is more efficient than using concat and copying the array.
(defun string-fill (new old)
  (dotimes (x (length new))
    (vector-push (char new x) old)))

;;; check-last-char returns the last character added to !!input-buffer
(defun check-last-char ()
  (char !!input-buffer (max 0 (1- (fill-pointer !!input-buffer)))))

;;; check-next-char-raw returns the character following the indice
;;; argument from the !!raw-input string.
(defun check-next-char-raw (n)
  (if (< n (1- (length !!raw-input)))
      (char !!raw-input (1+ n))
      #\space))

(defvar !!terminating-punc '(#\! #\. #\, #\?))

;;; all-digits-till-space does some look ahead on the raw input, to
;;;   make sure that the characters following the space upto the next
;;;   space, end-of-line, or sentence terminator are all legal 
;;;   constituents of a number.
;;; !!thousands-separator can occur more than once. (not complete,
;;;   as X,XXX,XX is legal)
;;; !!real-number-separator can occur only once.
;;; Does not take into account ratios x/y. They must be processed by the
;;;  grammar -- x %slash y
(defun all-digits-till-space (ch real-flg thou-flg)
  (do ((n (1+ ch) (1+ n))
       (pos 1 (1+ pos)))
      ((or (> n (1- (length !!raw-input)))
	   (char= (char !!raw-input n) #\space)
	   (and (member (char !!raw-input n)
			!!terminating-punc :test #'char=)
		(if (< (1+ n) (1- (length !!raw-input)))
		    (char= (char !!raw-input n) #\space)
		    t)))
       t)
    (if (not (digit-char-p (char !!raw-input n)))
	(if (and thou-flg 
		 (= 0 (mod (- pos ch) 3))
		 (member (char !!raw-input n) '(#\, #\.) :test #'char=))
	    nil
	    (progn (setq !!in-number nil)
		   (return nil))))))

(defun readline (&optional (*readtable* *dypar-readtable*))
  ;; first we make sure the fill-pointer is reset.
  (setf (fill-pointer !!input-buffer) 0)
  ;; raw-input gets the user's string after trailing garbage is removed.
  (setq !!raw-input (string-right-trim '(#\space #\return #\newline)
				       (read-line)))
  ;; We loop across the raw input making necessary conversions and storing
  ;; the result into the input buffer.
  (do ((ch 0 (1+ ch))
       (char))
      ((>= ch (length !!raw-input))

       ;; Once the raw input has been processed into !!input-buffer
       ;; we read it using the lisp reader and the dypar readtable.
       ;; This results in having a list of atoms which serve as the
       ;; argument to the parser.
       (with-input-from-string (s !!input-buffer)
	 (do ((word (read s nil nil) (read s nil nil))
	      (ans))
	     ((null word) (reverse ans))
	   (setq ans (cons word ans)))))

    ;; char is the current character from the raw input string.
    (setq char (char !!raw-input ch))

;    (format t "~&~S ~S" (fill-pointer !!input-buffer) (check-last-char))

    ;; We process this character according to the following conditions.
    (cond


      ;; If our character is an alphabetic use it as is.
      ((alpha-char-p char)
       (vector-push char !!input-buffer))

      ;; Digits need a little more processing, so as to insure the proper
      ;; result from combinations of digits and other characters.
      ;; The character used to distinguish thousands is skipped.
      ;; The character used for real numbers is retained as #\.
      ((digit-char-p char)

       ;; If the previous character was a space we are possibly processing
       ;; a number.  Likewise, if we start an input with a digit.
       (if (or (char= (check-last-char) #\space) (= ch 0))
	   (setq !!in-number t))

       ;; We always push the digit onto the input buffer
       (vector-push char !!input-buffer))

      ((char= char !!real-number-separator)
       (cond ((and (or (char= (check-last-char) #\space) (= ch 0))
		   (all-digits-till-space ch t nil))
	      (vector-push #\0 !!input-buffer)
	      (vector-push #\. !!input-buffer))
	     ((and !!in-number (all-digits-till-space ch t nil))
	      (vector-push #\. !!input-buffer))
	     ((member char !!ignore-chars :test #'char=))
	     ((member char !!special-chars :test #'char=)
	      (vector-push #\space !!input-buffer)
	      (string-fill (string (cdr (assoc char !!char-bindings)))
			   !!input-buffer)
	      (or (char= (check-next-char-raw ch) #\space)
		  (vector-push #\space !!input-buffer)))
	     (t (vector-push char !!input-buffer))))

      ((char= char !!thousands-separator)
       (cond ((and !!in-number (all-digits-till-space ch nil t)))
	     ((member char !!ignore-chars :test #'char=))
	     ((member char !!special-chars :test #'char=)
	      (vector-push #\space !!input-buffer)
	      (string-fill (string (cdr (assoc char !!char-bindings)))
			   !!input-buffer)
	      (or (char= (check-next-char-raw ch) #\space)
		  (vector-push #\space !!input-buffer)))
	     (t (vector-push char !!input-buffer))))

      ;; We convert accented characters to unaccented characters.
      ((member char !!accented-chars :test #'char=)
       (vector-push (cadr (assoc char !!accented-assoc)) !!input-buffer))

      ;; We need to do case conversion on the ¤ (ascii value 164 => 165)
      ((member char '(#\¤ #\¥) :test #'char=)
       (vector-push #\¥ !!input-buffer))

      ;; Process the space character.
      ((char= char #\space)
       (setq !!in-number nil)
       (or (char= (check-last-char) #\space)
	   (vector-push #\space !!input-buffer)))

      ;; We can also skip any characters listed in !!ignore-chars
      ((member char !!ignore-chars :test #'char=))

      ;; Process any characters from the special character list.
      ((member char !!special-chars)
       (or (char= (check-last-char) #\space)
	   (vector-push #\space !!input-buffer))
       (string-fill (string (cdr (assoc char !!char-bindings)))
		    !!input-buffer)
       (or (char= (check-next-char-raw ch) #\space)
	   (vector-push #\space !!input-buffer)))

      ;; We have encountered a character which receives no special processing
      (t (vector-push char !!input-buffer)))))