
;;;************************************************************************
;;;                                                                       *
;;;    Copyrighted Cornell University 2003                                *
;;;                                                                       *
;;;                                                                       *
;;;                Formal Digital Library System                          *
;;;                ------------------------------                         *
;;;                                                                       *
;;;   Developed by the FDL group, Department of Computer Science,         *
;;;   Cornell University, Ithaca NY.  See the release notes for a list    *
;;;   of the members of the group.                                        *
;;;                                                                       *
;;;   Permission is granted to use and modify FDL provided this notice    *
;;;   is retained in derived works.                                       *
;;;                                                                       *
;;;                                                                       *
;;;************************************************************************

#+cltl2(in-system-package)
#-cltl2(in-package *system-package-name*)

;**************************************************************************
;*                                                                        *
;*      Projet     Formel                       LCF    Project            *
;*                                                                        *
;**************************************************************************
;*                                                                        *
;*            Inria                         University of Cambridge       *
;*      Domaine de Voluceau                   Computer Laboratory         *
;*      78150  Rocquencourt                    Cambridge CB2 3QG          *
;*            France                                England               *
;*                                                                        *
;**************************************************************************

; F-parser.lisp   Original code: parser (lisp 1.6) part of Edinburgh LCF
;                 by M. Gordon, R. Milner and C. Wadsworth   (1978)
;                 Transported by G. Huet in Maclisp on Multics, Fall 1981
;
; V1.4 :idents may not start with ', but may include _.
;       tokens may include %
; V2.2 :breakout instead of err in function fail
; V3.1 : |...| notation for literal atoms
;
; to do:
;    replace parser completely
;    speed it up


(eval-when (compile)
  (proclaim '(inline
	      getascii asciip
	      charp upperp lowerp digitp digit8p letterp alphanump upperordigitp
	      spacep nump check
	      )))


(setq %skiplimit 30)                ; number of tokens to print when skipping


; Object language tokens  ;-- removed for PRL
;-- (setq endcnrtok '|"|)
;-- (setq anticnr-tok '|^|)
;-- (setq condl-tok '|=>|)
;-- (setq else-tok '\|)  ; |
;-- (setq lambda-tok '|\\|)    ; \\
;-- (setq eq-tok '|==|)
;-- (setq ineq-tok '|<<|)
;-- (setq neg-tok '|~|)
;-- (setq conj-tok '|/\\|)
;-- (setq disj-tok '|\\/|)
;-- (setq imp-tok '|==>|)
;-- (setq iff-tok '|<=>|)
;-- (setq forall-tok '|!|)
;-- (setq exists-tok '|?|)
;-- (setq arrow-tok '|->|)
;-- (setq sum-tok '|+|)
;-- (setq prod-tok '|#|)
;-- (setq nulltyptok '|.|)


;-- (setq spec-toks
;--     '(|\\|  \|  |:|  |(|  |)| |^|  |=>|  |,|  |.| 
;--       |==|  |<<|  |~|  |/\\|  |\\/|  |==>|  |<=>|  |?|  |!|  |"| ))


; Meta language symbols
(setq tml-sym '|;;|)
(setq tokqt-sym '|`|)
(setq escape-sym '|\\|)
(setq exfix-sym '|$|)
(setq neg-sym '|not|)   ;lc
(setq arrow-sym '|->|)
(setq prod-sym '|#|)
(setq sum-sym '|+|)
(setq list-sym '|list|)  ;lc
(setq null-sym '|.|)
;-- (setq cnr-sym '|"|)   Removed for PRL.
;-- (setq endcnr-sym '|"|)

(setq start-string-sym '|"|) 
(setq end-string-sym '|"|)

(setq mul-sym '|*|)
(setq div-sym '|/| #|||#)
(setq plus-sym '|+|)
(setq mns-sym '|-|)
(setq conc-sym '|@|)
(setq eq-sym '|=|)
(setq lt-sym '|<|)
(setq gt-sym '|>|)
(setq conj-sym '|&|)
(setq disj-sym '|or|)  ;lc
(setq condl-sym '|=>|)
(setq lam-sym '|\\|)
(setq asgn-sym '|:=|)
(setq else-sym '\| #|||#)   ; This crap to keep zmacs happy.
(setq trap-then-sym '|?|)
(setq trapif-then-sym '|??|)
(setq trapbind-then-sym '|?\\|)
(setq trap-loop-sym '|!|)
(setq trapif-loop-sym '|!!|)
(setq trapbind-loop-sym '|!\\|)

(setq trap-syms
    (list trap-then-sym trap-loop-sym trapif-then-sym trapif-loop-sym
          trapbind-then-sym trapbind-loop-sym))


(setq spec-syms
     (nconc (list div-sym else-sym escape-sym
                  trapbind-then-sym trapbind-loop-sym)
;                         /       |        \\        ?\\     !\\    |
        '(|:| |(|  |)|  |#|  ->  |,|  |.|  |[|  |]|  |;|  |;;|  :=
          |'|   %  $  |`|  |``|  _  *  +  -  @  =  <  >  &  =>
          ?  ?? !  !! )))  ;-- PRL: removed ", added '.


(setq rsvdwds '(|let| |letref| |letrec| |and| |with| |in|
                |deftype| |lettype| |abstype| |absrectype|
                |where| |whereref| |whererec|
                |wheretype| |whereabstype| |whereabsrectype|
                |begin| |end| |do| |it| |or|
                |not| |true| |false|
                |if| |then| |loop| |else|))
;--
;--  fail and failwith have been removed from the above list since
;--  we re-work them to give back-trace information.
;--


(setq declnconstrs '(mk-let mk-letrec mk-letref mk-deftype
                     mk-defrectype mk-abstype mk-absrectype))



(setq tokbearer '|<TOKEN>|)
(setq toklbearer '|<TOKEN-LIST>|)
(setq string-bearer '|<STRING>|)  ;-- for prl.
(setq pp-sym '" ... ")



;;; get next char
;;; nextch but skip comments, comments not nested. comment char can not be escaped???
(defun gnc (&optional active-escape-p charonlyp)
  (let ((ch (nextch active-escape-p charonlyp)))
    ;;not possible to skip blanks because of vartypes
    (cond
      ((eql ch cmntchr)
       (do () ((eql (nextch nil charonlyp) cmntchr)))
       (gnc active-escape-p charonlyp))				;skip comments;
      (t ch))))


;--  initialize lexical analyzer. Complicated by the fact that an eof
;-- durring initialization, this is not an error, but a normal end of
;-- ml.  We skip characters until we have something that is not
;-- part of a comment or a space.  If an eof occurs, then we throw
;-- a new token that indicates normal termination.




(setq token nil)  (setq ptoken nil)
(setq tokchs nil) (setq ptokchs nil)
(setq toktyp nil) (setq ptoktyp nil)
;(setq char ml-space)

; Assumes a is a non-numeric atom, returns  number which is ascii-code of the
; first char of a's print-name

(defun getascii (a)
  (let ((s (string a)))
    (declare (string s))
    (when (= 1 (length s))
	  (let ((ch (char s 0)))
	    (when (standard-char-p ch)
		  (character-to-code ch))))))
	
(defun asciip (n1 a n2)
  (when (or (stringp a) (symbolp a))
    (let ((code (getascii a)))
      (when code
	(< (1- n1) code (1+ n2))))))


;; should return similar result as charp.
(defun char-code-p (code)
  (< 32 code 127))

; Unix -- changed character codes to decimal
(defun charp (a)   (asciip  33. a 125.))    ; octal  41 - 175
(defun upperp (a)  (asciip 65. a 90.))      ; octal 101 - 132
(defun lowerp (a)  (asciip 97. a 122.))     ; octal 141 - 172
(defun digitp (a)  (asciip  48. a  57.))    ; octal  60 -  71
(defun digit8p (a) (asciip  48. a  55.))    ; octal  60 -  67

;; Do we want to allow non-ascii chars, ie unicode?
;; No, alphanump is used to distinquish identifiers. No need to generalize
;; alphabet for identifiers.
(defun letterp (a) (or (upperp a) (lowerp a)))
(defun alphanump (a) (or (letterp a) (digitp a) (eql a '|'|)(eql a '|_|)))
(defun upperordigitp (a) (or (upperp a) (digitp a)))

(defconstant *ml-spacep*  (list ml-space ml-cr ml-nl ml-tab))
(defun spacep (a) (member a *ml-spacep*))

;  Part 2: Predicates on tokens

(defun idenp (tok)
  (let ((letters (explode tok)))
    (and letters
	 (letterp (car letters))
	 (forall #'alphanump (cdr letters)))))

(defun nump (tok)
 (can (function intof) (list tok)))


; set up lexical analysis of multi-character special symbols
; ideally should be divided ML from OL
(setf (get '|=| 'double) '(|>| |=|))
(setf (get '|-| 'double) '(|>|))
(setf (get '|<| 'double) '(|<| |=|))
(setf (get '|:| 'double) '(|=|))
(setf (get '|`| 'double) '(|`|))
(setf (get '|?| 'double) '(|?| |\\|))
(setf (get '|;| 'double) '(|;|))
(setf (get '|!| 'double) '(|!| |\\|))
(setf (get '|/| 'double) '(|\\|))
(setf (get '|\\| 'double) '(|/|))
(setf (get '|==| 'double) '(|>|))
(setf (get '|<=| 'double) '(|>|))


(defun check (tok rslt msg)
  (cond ((eql tok token)
	 (gnt)
	 rslt)
	(t (fail msg)))
  )  ;check

(defun fail (msg)
  (llprinc msg) (llterpri)
  (llprins "skipping:")
  (llprinc ptoken)
  (llprins 'space)
  (llprinc token)
  (llprins 'space)
  (do ((i %skiplimit (1- i)))
      ((eql token tml-sym) (ifn (> i 0) (llprins ". . .")))
    (gnt)
    (if (> i 0) (progn (llprinc token) (llprins 'space))))
  ;;--   (initlean)   leave this to PRL
  (breakout parse nil)
  )					;fail


(setq arg1 nil)

(defun parse-properties-of-token (id)
  (or (cond
	((eql id eq-sym) *equal-parse-properties*)
	((eql id period) *period-parse-properties*)
	((eql id scolon) *semicolon-parse-properties*)
	(t nil))
      (let ((d (find-mldef-pp id)))
	(when d
	  (parse-properties-of-mldef d)))))

(defun get-langlp (token)
  (unless (eql langlp 'mllp)
    (break "get-langlp")
    (syserror '(get-langlp not mllp))
    )
  
  (let ((props (parse-properties-of-token token)))
    (when props
      (mllp-of-parse-properties props))))

(defun get-lang1 (token)
  (unless (eql lang1 'ml1)
    (break "get-lang1")
    (syserror '(get-lang1 not ml1))
    )
  
  (let ((props (parse-properties-of-token token)))
    (when props
      (ml1-of-parse-properties props))))

(defun get-lang2 (token)
  (unless (eql lang2 'ml2)
    (break "get-lang2")
    (syserror '(get-lang2 not ml2))
    )
  
  (let ((props (parse-properties-of-token token)))
    (when props
      (ml2-of-parse-properties props))))


;;(progn (setf -token token) (break "get-lang1") nil)

; main parse routine
; parses text until reaching level cpl
; saves its result in the *special arg1
(defun parse-pop (cpl)
  (prog  (x)
     (incf parsedepth)
     (gnt)
     (setq arg1
	   (cond ((not (or (numberp ptoken)
			   (null (setq x (get-lang1 ptoken)))))
		  (eval x))
		 (t (eval atom-rtn))))
     l (setq x (unless (numberp token)
		 (get-langlp token)))
     (cond ((and (null x) (not (< cpl juxtlevel)))
	    (decf parsedepth) (return arg1))
	   ((null x) (setq arg1 (eval juxt-rtn)) (go l))
	   ((not (< cpl x))
	    (decf parsedepth) (return arg1))
	   (t nil))
     (cond
       ((member (car arg1) declnconstrs)
	(fail '|non top level decln must have IN clause|)))
     (setq x (get-lang2 token))

     (when (null x)
       ;;(break "in the wrong")
       (fail (concat token '| in the wrong place|)))
     (gnt)
     (setq arg1 (eval x))
     (go l))
  )					;parse-pop



; get next token
(defun gnt ()
       (setq cflag (spacep char))                     ;for vartypes (berk)
       (setq ptoken token)
       (setq ptokchs tokchs)
       (setq ptoktyp toktyp)
       (setq pchar char)
       (do ()
	   ((not (spacep char)))
	 (setq pchar (setq char (gnc)))) ;remove spacing
       (cond
	 ((letterp char) (setq tokchs (list char)) ;ident
	  (setq toktyp 1)
	  (ident))
	 ((digitp char) (setq tokchs (list char)) ;number (ML only)
	  (setq toktyp 1)
	  ;;(if (eql lang1 'ml1) (numb) (ident)) ; it's always lang1 ???
	  (numb)
	  )
	 ((eql char tokqt-sym)
	  (setq tokchs nil)		;token(list?)
	  (setq toktyp 1)
	  (if (and (eql (setq char (nextch t)) tokqt-sym) (not (ml-escape-p)))
	      (tcnl)			;token list
	      (tcn)))			;single token
	 ((eql char start-string-sym)
	  (setq toktyp 1)
	  (set-string))
	 ((let ((bearer (abstract-parse-func char)))
	    (when bearer
	      (setq toktyp 1)
	      (setf token bearer)
	      (setq char (gnc)) 	; ok for now, but some parse-funcs
	      t)))
	 (t (setq toktyp 2)
	    (setq char (gnc))
	    (setq token pchar)
	    (if (and (eql token scolon) (eql char ml-lf)) ;on multics: lines end
                (setq char (gnc)))	; with ml-lf ;was (prog2 (gnc)(gnc))
	    (do () 
		((not (member char (get token 'double))))
	      (setq token (concat token char))
	      (setq char (gnc)))))
       token    
)  ;gnt

; scan a number and return its numeric value
(defun numb ()
  (do ()
      ((not (digitp (setq char (gnc)))))
    (push char tokchs))

  (setq token (readlist (reverse tokchs)))
  )  ;numb

; scan an identifier as a symbol (used also for numbers in OL)
(defun ident ()
  (do ()
      ((not (alphanump (setq char (gnc)))))
      (push char tokchs))
  (setq token (implode-toks (reverse tokchs)))
  )  ;ident

(defun tcn ()
  (prog nil
	l
	(cond
	  ;; to find `v' variables.
	 ((let ((bearer (abstract-parse-hook char tokchs)))
	    (when bearer
		  (setf token bearer)
		  (setf char (gnc nil nil))
		  (return token))))
	 ((and (eql char tokqt-sym) (not (ml-escape-p)))
	  (setq char (gnc nil t))
	  (setq token tokbearer)
	  (setf
	   (get tokbearer 'tokval)
	   (append (get tokbearer 'tokval)
		   (list (implode-toks (reverse tokchs)))))
	  (return token))
	 (t (setq tokchs (cons char tokchs))))
        (setq char (nextch t t))  ;NEW nextch was gnc
        (go l))
  )  ;tcn

(defun tcnl ()
  (prog (tokl)
        (setq tokl nil)
   l1   (setq char (nextch t t))  ;NEW nextch was gnc
         ;; maybe should be gnc since can't comment start of tok list
         ;; comment chars get folded into tok. maybe feature and bug
         ;; is that comments work at later points of list?
   l2   (cond
         ((and (eql char tokqt-sym) (not (ml-escape-p)))
          (cond
           ((and (eql (setq char (nextch t t)) tokqt-sym) (not (ml-escape-p)))   ;NEW nextch was gnc
            (when tokchs
	      (setq tokl (cons (implode-toks (nreverse tokchs)) tokl)))
            (setq token toklbearer)
            (setf
             (get toklbearer 'toklval)
             (append (get toklbearer 'toklval)
                     (list (nreverse tokl))))
            (setq char (gnc nil t))
            (return token))
           (t (setq tokchs (cons tokqt-sym tokchs)) (go l2))))
         ((spacep char)
          (do () ((not (spacep (setq char (gnc t t))))))   ;remove spaces
          (if tokchs (setq tokl (cons (implode-toks (nreverse tokchs)) tokl)))
          (setq tokchs nil)
          (go l2))
         (t (setq tokchs (cons char tokchs)) (go l1))))
  )  ;tcnl


(defun set-string ()
  (let ((acc nil))

    (do ((ch (nextch t t) (nextch t t)))
	((and (eql end-string-sym ch) (not (ml-escape-p))) 

	 (setq char (gnc))
	 (setf token string-bearer)
	 (setf (get string-bearer 'string-val)
	       (append (get string-bearer 'string-val)
		       (list (implode-toks-to-string (nreverse acc))))))

      (push ch acc))))


(defun vartype-rtn ()
  (prog (n)
        (cond (cflag (return mul-sym)))
        (setq n 1)
   loop (cond ((or (numberp token) (eql toktyp 1) (eql token mul-sym)))
              (t (return (implode-toks (charseq mul-sym n)))))
        (gnt)
        (cond
         ((and (eql ptoken mul-sym) (not cflag))
          (setq n (1+ n))
          (go loop)))
        (return (implode-toks (append (charseq mul-sym n) (explode ptoken)))))
  )  ;vartype-rtn


(defun last-val-reset ()
  (set-mldef-value *last-val-mldef* nil)
  (set-mldef-type *last-val-mldef* nullty))

(defvar *ml-fail*)

(defun premature-input-failure ()
  (llprinc '|Error:  Premature end of input.|)
  (llterpri)
  (llprinc (format-string "Scan address: ~a" (ml-line-count)))
  (setf *ml-fail*
	(basic-message (list 'error 'ml 'parse) (nreverse output-list)))
  (setq %it nil)
  (last-val-reset)
  (breakout tml-error-tag nil))

(defun initlean ()
  (setq token nil)
  (setq tokchs nil)
  (setq toktyp nil)
  (setf (get tokbearer 'tokval) nil)
  (setf (get toklbearer 'toklval) nil)
  (setf (get string-bearer 'string-val) nil)
  (initlean-abstract-type-bearers)

  (when (tag eoftag			;-- eof will be indicated with value t.
	     (setq char (gnc))		;-- get a character.  Skip comments.
	     (do () 
		 ((not (spacep char)))
	       (setq char (gnc))	;-- skip spaces.
	       ))			
    (breakout tmltag t)) 		;-- eof occured.  Stop.
  
  ;;-- No problems were encountered. If eof occurs,
  ;;-- this is not evaluated.
    
  ;; (gnt) moved here from parseml0 so ";;" can be ignored.
  ;; but now have to fake premature end of input if there is no token.
  ;; Hint: If input is ";;" then gnt returns tml-sym.

  (let ((ok nil))
    (tag eoftag
	 (progn
	   (gnt)
	   (setf ok t)))
    (when (not ok)
      (premature-input-failure)))
      
  (if (eql tml-sym token)
      (initlean)
      nil))				

