;;; synfl.el --- a syntactic font locking utility

;; Copyright 2004 Stephen J. Turnbull

;; Author: Stephen J. Turnbull <stephen@xemacs.org>
;; Created: 2002 June 10

;;; Commentary

;;; Code

(defvar synfl-debug t
  "Enables additional type-checking and debugging output.

May be nil (no debugging), t (all debugging), or an integral debugging level.")

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; synfl parser interpreter
;;
(defconst synfl-action-names '(shift reduce goto)
  "Names of parser operations.")

(defconst synfl-parse-error-names '(parse-error)
  "Names of syntax errors detected by the generic parser.

Error actions defined by the grammar are declared in the grammar's
'synfl-syntax-error-names property.")
  
;; #### the synfl-declare-*s functions should be defined in terms of functions
;; working on single instances with a "lazy" option to cons up lists of names
;; that have not already been declared, cf synfl-maybe-declare-syntax-error
(defun synfl-declare-grammar (name)
  "Declare and initialize SYMBOL as a synfl grammar."
  (unless (symbolp name)
    (error 'wrong-type-argument "grammar name must be a symbol" name)))

(defun synfl-check-grammar (object)
  "Return OBJECT if it is a synfl grammar, otherwise signal an error."
  (unless (symbolp object)
    (error 'wrong-type-argument "grammar name must be a symbol" object))
  (unless (get object 'synfl-terminal-list)
    (error 'wrong-type-argument "missing terminal list" object))
  ;; #### should check nonterminal and production lists
  ;; need to design a lazy definition mechanism
  )

(define-error 'synfl-redeclare
  "name redeclared"
  'invalid-change)
(define-error 'synfl-redeclare-terminal
  "terminal redeclared"
  'synfl-redeclare)
(define-error 'synfl-redeclare-nonterminal
  "nonterminal redeclared"
  'synfl-redeclare)

;; #### strategy for lazy declarations: declare all lhs of productions as
;; nonterminals, all elements of rhs as terminals.  Use condition-case to catch
;; redeclarations.  Redeclaration of terminal to nonterminal is executed by
;; removing name from terminals, other redeclarations are ignored.

(defun synfl-declare-terminal (grammar terminal)
  "In GRAMMAR, declare TERMINAL as the name of a terminal element.

Note that a token is not a symbol; it is a list whose car is a token name.

The names :eof and :dot are special.  They should not be given as
TERMINAL.  :eof is an internally-generated sentinel meaning end-of-file.
:dot is an internally-generated sentinel used in parser construction." 
  (when (not (symbolp terminal))
    (error 'wrong-type-argument "token name must be symbol" terminal))
  (when (memq terminal '(:eof :dot))
    (error 'synfl-redeclare-terminal "reserved token name" terminal))
  (let ((terminals (get grammar 'synfl-terminal-list))
	(nonterminals (get grammar 'synfl-nonterminal-list)))
    (when (memq terminal terminals)
      (error 'synfl-redeclare-terminal
	     "already declared as terminal" terminal))
    (when (memq terminal nonterminals)
      (error 'synfl-redeclare-nonterminal
	     "already declared as nonterminal" terminal))
    (put grammar 'synfl-terminal-list (cons terminal terminals)))
  ;; #### is this ever useful?
  (put terminal 'synfl-symbol-type 'terminal))

(defun synfl-declare-terminals (grammar &rest terminals)
  "Declare GRAMMAR's token names for TERMINALS (a list of symbols).

Note that a token is not a symbol; it is a list whose car a token name.

The names :eof and :dot are special.  They should not be included in
TERMINALS.  :eof is an internally-generated sentinel meaning end-of-file.
:dot is an internally-generated sentinel used in parser construction."
  (put grammar 'synfl-terminal-list '(:eof))
  (mapc (lambda (x)
	  (synfl-declare-terminal grammar x))
	terminals))

(defun synfl-declare-nonterminal (grammar nonterminal)
  "In GRAMMAR, declare NONTERMINAL as the name of a nonterminal element.

Note that a token is not a symbol; it is a list whose car a token name.

The names :eof and :dot are special.  They should not be given as
NONTERMINAL.  :eof is an internally-generated sentinel meaning end-of-file.
:dot is an internally-generated sentinel used in parser construction."
  (when (not (symbolp nonterminal))
    (error 'wrong-type-argument "token name must be symbol" nonterminal))
  (when (memq nonterminal '(:eof :dot))
    (error 'wrong-type-argument "reserved token name" nonterminal))
  (let ((terminals (get grammar 'synfl-terminal-list))
	(nonterminals (get grammar 'synfl-nonterminal-list)))
    (when (memq nonterminal terminals)
      (error 'synfl-redeclare-terminal
	     "already declared as terminal" nonterminal))
    (when (memq nonterminal nonterminals)
      (error 'synfl-redeclare-nonterminal
	     "already declared as nonterminal" nonterminal))
    (put grammar 'synfl-nonterminal-list (cons nonterminal nonterminals)))
  ;; #### is this ever useful?
  (put nonterminal 'synfl-symbol-type 'nonterminal))

(defun synfl-declare-nonterminals (grammar &rest nonterminals)
  "Declare GRAMMAR's NONTERMINALS (a list of symbols).

Note that a token is not a symbol; it is a list whose car a token name.

The names :eof and :dot are special.  They should not be included in
NONTERMINALS.  :eof is an internally-generated sentinel meaning end-of-file.
:dot is an internally-generated sentinel used in parser construction."
  (put grammar 'synfl-nonterminal-list nil)
  (mapc (lambda (x)
	  (synfl-declare-nonterminal grammar x))
	nonterminals))

(defun synfl-declare-production (grammar production)
  "Declare GRAMMAR to have PRODUCTION as a production.

PRODUCTION is a list whose car is a nonterminal \(the lhs of the production),
whose cadr is a semantic function, and whose cddr is a list of grammar
symbols \(ie, members of the union of terminals and nonterminals).

The semantic function should take three arguments, the production itself, a
semantic state object, and a memo.  The form of a semantic state object is
determined by the semantics attached to the grammar, except that nil is the
initial state.  The memo contains the part of the stack popped off in the
reduction, including states and \"covered\" tokens.  A token is a list whose
first element is a grammar terminal, and whose remaining elements \(if any)
are specific to the grammar and its semantics.

Note that in typical Emacs applications, input will be a buffer and the
semantic functions will have side effects on it \(eg, creating extents)."
  (or (memq (car production) (get grammar 'synfl-nonterminal-list))
      (error 'wrong-type-argument
	     "lhs of synfl production must be nonterminal" (car production)))
  (or (functionp (cadr production))
      (error 'wrong-type-argument
	     "second of synfl production must be function" (cadr production)))
  (mapc (lambda (y)
	  (or (memq y (get grammar 'synfl-terminal-list))
	      (memq y (get grammar 'synfl-nonterminal-list))
	      (error 'wrong-type-argument
		     "element of synfl production must be token name"
		     y)))
	(cddr production))
  (put grammar 'synfl-production-list (nconc (get grammar
						  'synfl-production-list)
					     (list production))))

(defun synfl-declare-productions (grammar &rest productions)
  "Declare GRAMMAR's list of PRODUCTIONS \(list of lists of symbols).

Each element of PRODUCTIONS is a list whose car is a nonterminal \(the lhs of
the production), whose cadr is a semantic function, and whose cddr is a list
of grammar symbols \(ie, members of the union of terminals and nonterminals).

The semantic function should take three arguments, the production itself, a
semantic state object, and a memo.  The form of a semantic state object is
determined by the semantics attached to the grammar, except that nil is the
initial state.  The memo contains the part of the stack popped off in the
reduction, including states and \"covered\" tokens.  A token is a list whose
first element is a grammar terminal, and whose remaining elements \(if any)
are specific to the grammar and its semantics.

Note that in typical Emacs applications, input will be a buffer and the
semantic functions will have side effects on it \(eg, creating extents)."
  (mapc (lambda (x)
	  (synfl-declare-production grammar x))
	productions))

;; #### make all the memq's into errors and catch them when lazy
(defun synfl-maybe-declare-syntax-error (grammar name)
  "Declare GRAMMAR as detecting a syntax error named by NAME.

Ignored if already declared as a syntax error, or a member of
`synfl-action-names' or `synfl-parse-error-names'."

  ;; #### all these should be `check-type's
  (cond ((not (symbolp grammar))
	 (error 'wrong-type-argument "grammar must be a symbol" grammar))
	((not (symbolp name))
	 (error 'wrong-type-argument "action name must be a symbol" name)))
  (let ((declared-errors (get grammar 'synfl-syntax-error-names)))
    (or (memq name synfl-action-names)
	(memq name declared-errors)
	(memq name synfl-parse-error-names)
	(when synfl-debug
	  (warn (format "adding new syntax error: %s" name))
	  nil)
	(put grammar 'synfl-syntax-error-names (cons name declared-errors)))))

(defun synfl-declare-start-symbol (grammar symbol)
  "Declare GRAMMAR's start symbol to be SYMBOL."
  (cond ((not (symbolp grammar))
	 (error 'wrong-type-argument "grammar must be a symbol" grammar))
	((not (symbolp symbol))
	 (error 'wrong-type-argument "start symbol must be a symbol" symbol)))
  (put grammar 'synfl-start-symbol symbol))

(defun synfl-put-parser-entry (grammar symbol state action)
  "In GRAMMAR's parse table set the action for SYMBOL and STATE to ACTION."

  (cond ((not (symbolp grammar)) (error 'wrong-type-argument
					"grammar must be a symbol" grammar))
	((not (symbolp symbol)) (error 'wrong-type-argument
				       "token name must be a symbol" symbol))
	;; can't do upper range check here
	((or (not (integerp state)) (< state 0))
	 (error 'wrong-type-argument "state must be a nonnegative integer"
		state)))
  (synfl-check-action action grammar)

  ;; #### Should keep track of maximum state for efficient reallocation
  ;; #### Should make column reallocation function and use it to "square" table.
  ;; #### Note: these changes may be irrelevant if we use table compression.

  (let ((table (get grammar 'synfl-parser-table))
	;; currently unused
	;(table-size (get grammar 'synfl-parser-table-size))
	(nsymbols (+ (length (get grammar 'synfl-terminal-list))
		     (length (get grammar 'synfl-nonterminal-list)))))
    ;; Lazy initialization
    (cond ((hash-table-p table))
	  (table (error 'invalid-state "expected a hash table" table))
	  (t (setq table (make-hash-table :size nsymbols))
	     (put grammar 'synfl-parser-table table)))
    (let ((col (gethash symbol table))
	  (default-error '(parse-error . "Unhandled syntax error")))
      (insert (format "%s %s %s\n" col state action))
      (setq col
	    (cond ((and (vectorp col) (< state (length col))) col)
		  ;; This may be very slow for large tables
		  ;; due to all the memory allocation
		  ((vectorp col)
		   (let ((newcol (make-vector (1+ state) default-error))
			 (i (length col)))
		     (while (> i 0)
		       (setq i (1- i))
		       (aset newcol i (aref col i)))
		     newcol))
		  (col (error 'invalid-state "expected a vector" col))
		  (t (let ((newcol (make-vector (1+ state) default-error)))
		       newcol))))
      (insert (format "%s %s %s\n" col state action))
      (aset col state action)
      (puthash symbol col table))))

(defun synfl-lr-parse (grammar input)
  "Produce a parse according to GRAMMAR for INPUT.

GRAMMAR is a synfl grammer object, a symbol with the following properties:

synfl-production-list   A list of productions \(see `synfl-declare-productions')
synfl-start-symbol      The start symbol \(a nonterminal symbol) #### unneeded?
synfl-parser-table      The parser table \(see `synfl-put-parser-entry')
synfl-get-input         A function taking two arguments, INPUT and the input
                        pointer.  The input pointer is an object determining
                        where the next input is taken from.  t means start at
                        the beginning, nil means EOF has been reached.  Other
                        semantics of the input pointer are implementation-
                        dependent.  Returns a cons of an input token \(a list
                        whose car is a terminal symbol) and an input pointer.

INPUT is the input stream.  Form is determined by the semantics of GRAMMAR's
synfl-get-input property and of the semantic functions in its productions."

  (let ((ip t)
	(stack '(0))
	(state nil))
    (catch 'done
      (flet ((action (state token)
	       (check-type state integer)
	       (synfl-check-token token grammar)
	       (let ((col (gethash (car token)
				   (get grammar 'synfl-parser-table))))
		 (if (and col (< state (length col)))
		     (aref col state)
		   (throw 'done (list 'parse-error "Unhandled syntax error"
				      col state token)))))
	     (goto (state nonterminal)
	       (check-type state integer)
	       (unless (memq nonterminal (get grammar 'synfl-nonterminal-list))
		 (error "in goto, not a nonterminal" nonterminal "state" state))
	       (let* ((col (gethash nonterminal
				    (get grammar 'synfl-parser-table)))
		      (val (if (and col (< state (length col)))
			       (cadr (aref col state))
			     'not-an-integer)))
		 ;(insert (format "val: %s; state: %s; column: %s\n"
		 ;		 val state col))
		 (if (integerp val)
		     val
		   (throw 'done '(goto-error . "Unexpected goto error"))))))
	(while t
	  (insert (format "stack: %s; ip: %s\n" stack ip))
	  (let* ((top (car stack))
		 (pair (funcall (get grammar 'synfl-get-input) input ip))
		 (token (if (cdr pair) (car pair) (list :eof)))
		 (action (action top token)))
	    (insert (format "stack: %s; ip: %s; token: %s; action: %s\n"
			    stack ip token action))
	    (cond ((eq (car action) 'shift)
		   (push token stack)
		   (push (cadr action) stack)
		   (setq ip (cdr pair)))
		  ((eq (car action) 'reduce)
		   (let* ((memo '())
			  (production (nth (1- (cadr action))
					   (get grammar
						'synfl-production-list)))
			  (range (copy-sequence (cddr production))))
		     ;(insert (format "%s %s\n" (cadr action) production))
		     ;(insert (format "stack: %s\n" stack))
		     (while range
		       (push (pop stack) memo)
		       (push (pop stack) memo)
		       (setq range (cdr range)))
		     ;(insert (format "stack: %s; memo: %s\n" stack memo))
		     (let ((top (car stack))
			   (nonterminal (car production)))
		       (push (list nonterminal) stack)
		       ;(insert (format "top: %s; nonterminal: %s\n"
		       ;                top nonterminal))
		       (push (goto top nonterminal) stack))
		     (insert (format "stack: %s\n" stack))
		     (funcall (cadr production) production state memo)))
		  ((eq (car action) 'accept)
		   (throw 'done (cons 'win state)))
		  ((memq (car action) synfl-parse-error-names)
		   (throw 'done action))
		  ((memq (car action) (get grammar 'synfl-syntax-errors))
		   (throw 'done action))
		  (t (throw 'done (cons 'lose "Unexpected error"))))))))))

;;
;; debugging information
;;
(define-error 'synfl-token-not
  "must be a list starting with a synfl token name"
  'wrong-type-argument)
(define-error 'synfl-token-malformed
  "must be a list"
  'synfl-token-not)
(define-error 'synfl-token-terminal-not
  "must be a list starting with a synfl terminal token name"
  'synfl-token-not)
(define-error 'synfl-token-nonterminal-not
  "must be a list starting with a synfl nonterminal token name"
  'synfl-token-not)
(define-error 'synfl-action-not
  "must be a list starting with a parser action name"
  'wrong-type-argument)

;; #### maybe these synfl-check-* functions should be replaced by check-type?
(defun synfl-check-token (object grammar &optional type &rest frobs)
  "Return OBJECT if it is a token of GRAMMAR, else signal an error.

OBJECT may be any object, GRAMMAR is a symbol naming a synfl grammar.
A token is a list whose first element is a terminal or nonterminal of GRAMMAR.
Optional argument TYPE may be 'terminal or 'nonterminal, in which case the
token is restricted to be of that type.  FROBS is a list of error frobs."

  (let ((names (append (unless (eq type 'nonterminal)
			 (get grammar 'synfl-terminal-list))
		       (unless (eq type 'terminal)
			 (get grammar 'synfl-nonterminal-list))))
	(err (cond ((eq type 'terminal) 'synfl-token-terminal-not)
		   ((eq type 'nonterminal) 'synfl-token-nonterminal-not)
		   (t 'synfl-token-not))))
    (unless (listp object)
      (error 'synfl-token-malformed frobs))
    (unless (memq (car object) names)
      (error err frobs)))
  ;; object is valid
  object)

(defun synfl-check-action (object grammar)
  "Return OBJECT if it is a synfl parser action or a syntax error of GRAMMAR."
  (cond ((or (not (listp object))
	     (not (or (memq (car object) synfl-action-names)
		      (memq (car object)
			    (get grammar 'synfl-syntax-error-names))
		      (memq (car object) synfl-parse-error-names))))
	 (error 'synfl-action-not object))
	((memq (car object) '(goto shift reduce)) ; explicit, don't know that
						  ; future actions will have
						  ; one integer argument
	 (unless (integerp (cadr object))
	   (error 'synfl-action-not object
		  "goto, shift, and reduce take one integer argument"
		  (cadr object))))
	;; #### can we say anything about syntax errors?
	)
  object)

(defun synfl-prettyprint-parser-table (grammar)
  (maphash
   (lambda (key value)
     (let ((c (mapvector
	       (lambda (cons)
		 (cond ((eq (car cons) 'shift) (format "s%-2d" (cadr cons)))
		       ((eq (car cons) 'goto) (format "g%-2d" (cadr cons)))
		       ((eq (car cons) 'reduce) (format "r%-2d" (cadr cons)))
		       (t "e  ")))
	       value)))
       (insert (format "%-10s %s\n" key c))))
   (get grammar 'synfl-parser-table)))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; synfl parser generation
;;

;; ASU Algorithm 4.8 "Constructing an SLR parsing table."
;;
;; Input: an augmented grammar G'.
;;
;; Output: the SLR parsing table functions action and goto for G'.
;;
;; Method:
;;
;; 1. Construct C = {I0, ..., In}, the collection of sets of LR(0)
;; items for G'.
;;
;; 2. State i is constructed from Ii.  The parsing actions for state i
;; are determined as follows:
;;
;;   a) If [A -> \alpha \cdot a \beta] is in Ii and goto(Ii,a) = Ij,
;;   then set action[i,a] to shift j.  a must be a terminal.
;;
;;   b) If [A -> \alpha \cdot] is in Ii, then set action[i,a] to
;;   "reduce A -> \alpha" for all a in FOLLOW(A); A may not be S'.
;;
;;   c) If [S' -> S] is in Ii, then set action[i,$] to "accept."
;;
;; If any conflicting actions are generated by the above rules, the
;; grammar is not SLR(1).  The algorithm fails to produce a parser.
;;
;; 3. The goto rules for state i are constructed for all nonterminals
;; A using the rule: If goto(Ii,A) = Ij, then goto[i,A] = j.
;;
;; 4. All entries not defined by rules 2 and 3 are made "error."
;;
;; 5. The initial state of the parser is the one constructed from the
;; set of items containing [S' -> \cdot S].

;; ASU Figure 4.34 "The Sets-of-Items Construction"
;;
;; To construct C, the canonical collection of sets of LR(0) items:
;;
;; procedure items (G');
;; begin
;;     C := {closure({[S' -> \cdot S]})};
;;     repeat
;;         for each set of items I in C and each grammar symbol X such
;;             that goto(I,X) is not empty and not in C do
;;                 add goto(I,X) to C
;;     until no more sets of items can be added to C
;; end

;; ASU "The Goto Operation"
;;
;; goto(I,X) is the closure of the set of items [A -> \alpha X \cdot \beta]
;; such that [A -> \alpha \cdot X \beta] is in I.

;; ASU Figure 4.33 "The Closure Operation"
;;
;; function closure (I)
;; begin
;;     J := I
;;     repeat
;;         for each item A -> \alpha \cdot B \beta in J and each
;;             production B -> \gamma of G such that B -> \cdot \gamma
;;             is not in J do
;;                 add B -> \cdot \gamma to J
;;     until no more items can be added to J;
;;     return J
;; end

;; ASU "FIRST and FOLLOW"
;;
;; FIRST (X) for any grammar symbol X
;;
;; 1. If X is terminal, FIRST(X) is {X}.
;;
;; 2. If X -> \epsilon is a production, then add \epsilon to FIRST(X).
;;
;; 3. If X -> Y1 ... Yk is a production, then place a in FIRST(X) if
;; for some i, a is in FIRST(Yi), and \epsilon is in all of FIRST(Yj)
;; for j < i, ie, Y1 ... Yk =*> \epsilon.
;;
;; For any string X1 ... Xn, FIRST(X1...Xn) is computed by starting
;; with all non-\epsilon symbols of X1.  Also add the non-\epsilon
;; symbols of Xi if for all j < i, \epsilon is in Xj.  Finally, add
;; \epsilon to FIRST(X1...Xn) if for all i FIRST(Xi) contains \epsilon.
;;
;; To compute FOLLOW (A) for all nonterminals A, apply the following
;; rules until nothing can be added to any FOLLOW set.
;;
;; 1. Place $ in FOLLOW(S).
;;
;; 2. If there is a production A -> \alpha B \beta, then everything in
;; FIRST(\beta) except for \epsilon is placed in FOLLOW(B).
;;
;; 3. If there is a production A -> \alpha B, or a production
;; A -> \alpha B \beta where FIRST(\beta) contains \epsilon, then
;; everything in FOLLOW(A) is in FOLLOW(B).

;; #### change all this to use a hash
(defun synfl-first (grammar list-or-symbol)
  "Return FIRST(LIST-OR-SYMBOL) according to GRAMMAR."
  (let* ((first-alist (get grammar 'synfl-first-alist))
	 (x (cond ((symbolp list-or-symbol) (list list-or-symbol))
		  ((listp list-or-symbol) list-or-symbol)
		  (t (error 'invalid-argument
			    "not a list or symbol"
			    list-or-symbol))))
	 (rest (cdr x))
	 (elt (car x))
	 (first (cdr (assq elt first-alist)))
	 (has-epsilon (memq :epsilon first))
	 candidates candidate)
    (while (and has-epsilon rest)
      (setq elt (car rest)
	    rest (cdr rest)
	    candidates (cdr (assq elt first-alist))
	    has-epsilon (memq :epsilon candidates))
      (while candidates
	(setq candidate (car candidates))
	(setq candidates (cdr candidates))
	(unless (or (memq candidate first) (eq candidate :epsilon))
	  (setq first (cons candidate first)))))
    (when has-epsilon (setq first (cons :epsilon first)))
    first))

(defun synfl-initialize-first-alist (grammar)
  "Initialize the alist mapping X -> FIRST(X) for GRAMMAR."
  (synfl-initialize-first-alist-terminals grammar)
  (synfl-initialize-first-alist-nil-productions grammar)
  (synfl-initialize-first-alist-iterate grammar))

(defun synfl-initialize-first-alist-terminals (grammar)
  "Internal routine for `synfl-initialize-first-alist'.  Unsafe used elsewhere.
Clears the alist and initializes FIRST(X) for terminals."
  (put grammar 'synfl-first-alist
       (let ((first-alist nil))
	 (mapc (lambda (x)
		 (setq first-alist (cons (list x x) first-alist)))
	       (get grammar 'synfl-terminal-list))
	 first-alist)))

(defun synfl-initialize-first-alist-epsilon-productions (grammar)
  "Internal routine for `synfl-initialize-first-alist'.  Unsafe used elsewhere.
Adds :epsilon to FIRST(X) for nonterminals with :epsilon productions.
If there is no :epsilon production, initialize FIRST(X) to ()."
  (let ((first-alist (get grammar 'synfl-first-alist))
	(productions (get grammar 'synfl-production-list))
	(nonterminals (get grammar 'synfl-nonterminal-list)))
    (mapc (lambda (x)
	    (while productions
	      (let ((p (car productions)))
		(if (and (eq x (car p))
			 (eq :epsilon (third p)))
		    (progn
		      (setq first-alist (cons `(,x (:epsilon)) first-alist))
		      (setq productions nil))
		  (setq productions (cdr productions)))))
	    (unless (assq x first-alist)
	      (setq first-alist (cons (list x) first-alist))))
	  nonterminals)
    (put grammar 'synfl-first-alist first-alist)))

;; #### combine this with the previous function?
(defun synfl-initialize-first-alist-iterate (grammar)
  "Internal routine for `synfl-initialize-first-alist'.  Unsafe used elsewhere.
Iterates over productions."
  (let ((first-alist (get grammar 'synfl-first-alist))
	(continue t))
    (while continue
      (setq continue nil)
      (let ((productions (get grammar 'synfl-production-list)))
	(while productions
	  (let* ((p (car productions))
		 (x (car p))
		 (rhs (cddr p))
		 (first (cdr (assq x first-alist))))
	    (message "production: %s" p)
	    (while rhs
	      (let* ((y (cdr (assq (car rhs) first-alist)))
		     (has-epsilon (memq :epsilon y))
		     (candidates (delq :epsilon y))
		     (candidate (car candidates)))
		(if (> (length candidates) 5) (error "too big"))
		(message "rhs: %s" rhs)
		(while candidates
		  (message "candidates: %s" candidates)
		  (unless (memq candidate first)
		    (setq first (cons candidate first))
		    (setq continue t))
		  (setq candidates (cdr candidates))
		  (setq candidate (car candidates)))
		(when (and has-epsilon
			   (not (cdr rhs))
			   (not (memq :epsilon first)))
		  (setq first (cons :epsilon first))
		  (setq continue t))
		(message "first: %s" first)
		(setq rhs (if has-epsilon (cdr rhs) nil))))
	    (setq first-alist (cons (cons x first)
				    (delete (assq x first-alist) first-alist))))
	  (setq productions (cdr productions)))))
    (put grammar 'synfl-first-alist first-alist)))


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; testing
;;
(defun synfl-test-semantics (production state memo)
  "Trace semantics for a synfl parser.

PRODUCTION is formatted and output to a buffer, STATE and MEMO are ignored."
  (let ((lhs (car production))
	(rhs (cddr production)))
    (message "Reduce by %s -> %s" lhs rhs)
    ;(set-buffer (get-buffer-create " *synfl test buffer*"))
    (insert (format "%s ->" lhs))
    (while rhs
      (insert (format " %s" (car rhs)))
      (setq rhs (cdr rhs)))
    (insert "\n")))

(defun synfl-test-get-input (input ip)
  (when (eq ip t) (setq ip input))
  (when ip (cons (list (car ip)) (cdr ip))))

;;
;; an example: Aho, Sethi, Ullman _Compilers_, Example 4.33
;;
(defun synfl-asu4-33 ()
  (interactive)

  ;; set up get-input, element lists, and productions
  (synfl-declare-grammar 'asu4-33)
  (put 'asu4-33 'synfl-get-input 'synfl-test-get-input)
  (synfl-declare-terminals 'asu4-33 'add 'mul 'lpar 'rpar 'id)
  (synfl-declare-nonterminals 'asu4-33 'expr 'term 'fact)
  (synfl-declare-start-symbol 'asu4-33 'expr)
  (synfl-declare-productions 'asu4-33
			     '(expr synfl-test-semantics expr add term)
			     '(expr synfl-test-semantics term)
			     '(term synfl-test-semantics term mul fact)
			     '(term synfl-test-semantics fact)
			     '(fact synfl-test-semantics lpar expr rpar)
			     '(fact synfl-test-semantics id))

  ;; load parse table by hand
  (synfl-put-parser-entry 'asu4-33 'id 0 '(shift 5))
  (synfl-put-parser-entry 'asu4-33 'id 4 '(shift 5))
  (synfl-put-parser-entry 'asu4-33 'id 6 '(shift 5))
  (synfl-put-parser-entry 'asu4-33 'id 7 '(shift 5))
  (synfl-put-parser-entry 'asu4-33 'add 1 '(shift 6))
  (synfl-put-parser-entry 'asu4-33 'add 2 '(reduce 2))
  (synfl-put-parser-entry 'asu4-33 'add 3 '(reduce 4))
  (synfl-put-parser-entry 'asu4-33 'add 5 '(reduce 6))
  (synfl-put-parser-entry 'asu4-33 'add 8 '(shift 6))
  (synfl-put-parser-entry 'asu4-33 'add 9 '(reduce 1))
  (synfl-put-parser-entry 'asu4-33 'add 10 '(reduce 3))
  (synfl-put-parser-entry 'asu4-33 'add 11 '(reduce 5))
  (synfl-put-parser-entry 'asu4-33 'mul 2 '(shift 7))
  (synfl-put-parser-entry 'asu4-33 'mul 3 '(reduce 4))
  (synfl-put-parser-entry 'asu4-33 'mul 5 '(reduce 6))
  (synfl-put-parser-entry 'asu4-33 'mul 9 '(shift 7))
  (synfl-put-parser-entry 'asu4-33 'mul 10 '(reduce 3))
  (synfl-put-parser-entry 'asu4-33 'mul 11 '(reduce 5))
  (synfl-put-parser-entry 'asu4-33 'lpar 0 '(shift 4))
  (synfl-put-parser-entry 'asu4-33 'lpar 4 '(shift 4))
  (synfl-put-parser-entry 'asu4-33 'lpar 6 '(shift 4))
  (synfl-put-parser-entry 'asu4-33 'lpar 7 '(shift 4))
  (synfl-put-parser-entry 'asu4-33 'rpar 2 '(reduce 2))
  (synfl-put-parser-entry 'asu4-33 'rpar 3 '(reduce 4))
  (synfl-put-parser-entry 'asu4-33 'rpar 5 '(reduce 6))
  (synfl-put-parser-entry 'asu4-33 'rpar 8 '(shift 11))
  (synfl-put-parser-entry 'asu4-33 'rpar 9 '(reduce 1))
  (synfl-put-parser-entry 'asu4-33 'rpar 10 '(reduce 3))
  (synfl-put-parser-entry 'asu4-33 'rpar 11 '(reduce 5))
  (synfl-put-parser-entry 'asu4-33 :eof 1 '(accept))
  (synfl-put-parser-entry 'asu4-33 :eof 2 '(reduce 2))
  (synfl-put-parser-entry 'asu4-33 :eof 3 '(reduce 4))
  (synfl-put-parser-entry 'asu4-33 :eof 5 '(reduce 6))
  (synfl-put-parser-entry 'asu4-33 :eof 9 '(reduce 1))
  (synfl-put-parser-entry 'asu4-33 :eof 10 '(reduce 3))
  (synfl-put-parser-entry 'asu4-33 :eof 11 '(reduce 5))
  (synfl-put-parser-entry 'asu4-33 'expr 0 '(goto 1))
  (synfl-put-parser-entry 'asu4-33 'expr 4 '(goto 8))
  (synfl-put-parser-entry 'asu4-33 'term 0 '(goto 2))
  (synfl-put-parser-entry 'asu4-33 'term 4 '(goto 2))
  (synfl-put-parser-entry 'asu4-33 'term 6 '(goto 9))
  (synfl-put-parser-entry 'asu4-33 'fact 0 '(goto 3))
  (synfl-put-parser-entry 'asu4-33 'fact 4 '(goto 3))
  (synfl-put-parser-entry 'asu4-33 'fact 6 '(goto 3))
  (synfl-put-parser-entry 'asu4-33 'fact 7 '(goto 10))

  ;; OK, let's do it!
  (synfl-lr-parse 'asu4-33 '(id mul id add id)))

;;; End of synfl.el
