/*
   File: parser.c
   Parses an eag from a file and builds the hyper and meta tree
*/

/* global includes */
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>

/* libeag includes */
#include <export.h>
#include <error.h>
#include <memalloc.h>
#include <textstorage.h>

/* local includes */
#include <sizes.h>
#include <tree.h>
#include <gentree.h>
#include <typecheck.h>
#include <lex.h>
#include <parser.h>
#include <main.h>

private int parse_errors;
private char *first_nonterminal;

private void init_parser ()
	{ parse_errors = 0;
	  first_nonterminal = string_nil;
	};

private void parser_error (int lin, int col, char *format, ...)
	{ char buf[MAXSTRLEN];
	  va_list arg_ptr;
	  va_start (arg_ptr, format);
	  vsprintf (buf, format, arg_ptr);
	  va_end (arg_ptr);

	  parse_errors++;
	  error ("parse error at line %d, col %d: %s", lin, col, buf);
	};

private void skiptokens (symbol *toks)
	{ int i;
	  while (1)
	     { if (thistoken == IDENTIFIER) return;
	       for (i=0; toks[i] != UNDEFINED; i++)
		  if (thistoken == toks[i]) return;
	       insymbol ();
	     };
	};

private int is_symbol (int sy)
	{ if (thistoken != sy) return (0);
	  insymbol ();
	  return (1);
	};

private void should_be_symbol (int sy, char *kind, symbol *s)
	{ if (is_symbol (sy)) return;
	  parser_error (line, column, "%s expected", kind);
	  skiptokens (s);
	};
 
private void may_be_plus_or_star (int *kind)
	{ switch (nexttoken)
	     { case IDENTIFIER:
	       case NUMBER:
	       case STRING:
	       case SET:
	       case UP: return;
	       default:
		  { if (is_symbol (STAR)) *kind |= star;
	  	    else if (is_symbol (PLUS)) *kind |= plus;
		  };
	     };
	};

private void may_be_strict (int *kind)
	{ if (is_symbol (EXCLAMATIONMARK)) *kind |= strict;
	};

private int is_set (set *s)
	{ char *string = sval;
	  int kind = 0;

	  if (is_symbol (SET))
	     { may_be_plus_or_star (&kind);
	       if ((kind & star) || (kind & plus)) may_be_strict (&kind);
	     }
	  else return (0);
	  *s = new_set (kind, string);
	  return (1);
	};

private symbol identrecset[] =
	{ COMMA, COLON, SEMICOLON, POINT, PLUS,
	  STAR, FLOW, RIGHTPARENTHESIS, UNDEFINED
	};
private int is_proper_set (set *s)
	{ if (is_symbol (UP))
	     { if (is_set (s)) (*s) -> kind |= non;
	       else
		  { parser_error (line, column, "set expected");
		    skiptokens (identrecset);
		    *s = set_nil;
		  };
	       return (1);
	     }
	  return (is_set (s));
	};

private int is_identifier (char **val)
	{ char *id = sval;
	  if (!is_symbol (IDENTIFIER)) return (0);
	  *val = id;
	  return (1);
	};

#define ERROR_ID "<ERROR>"
private void should_be_identifier (char **val)
	{ char *id = sval;
	  if (is_symbol (IDENTIFIER)) *val = id;
	  else
	     { parser_error (line, column, "identifier expected");
	       *val = addto_names (ERROR_ID);
	       skiptokens (identrecset);
	     };
	};

private int is_number (int *val)
	{ int num = nval;
	  if (is_symbol (NUMBER)) *val = num;
	  else return (0);
	  return (1);
	};

private int is_string (char **val)
	{ char *s = sval;
	  if (is_symbol (STRING)) *val = s;
	  else return (0);
	  return (1);
	};

private int is_term (affix *a)
	{ char *sval;
	  int nval;
	  set nuset;

	  if (is_identifier (&sval))
	     { affix_variable new = new_affix_variable (sval);
	       *a = new_affix_nonterminal (new);
	     }
	  else if (is_string (&sval)) *a = new_affix_terminal (sval);
	  else if (is_number (&nval)) *a = new_affix_number (nval);
	  else if (is_proper_set (&nuset)) *a = new_affix_set (nuset);
	  else return (0);
	  return (1);
	};

private symbol termrecset[] =
	{ SEMICOLON, COLON, POINT, RIGHTPARENTHESIS,
	  FLOW, STAR, PLUS, UNDEFINED };
private void should_be_term (affix *a)
	{ if (is_term (a)) return;
	  parser_error (line, column, "term expected");
	  skiptokens (termrecset);
	  *a = affix_nil;
	};

private void should_be_concatenation (expr *e, affix a)
	{ affix_list new = new_affix_list ();
	  affix nexta;
	
	  app_affix_list (new, a);
	  should_be_term (&nexta);
	  app_affix_list (new, nexta);
	  while (is_symbol (PLUS))
	     { should_be_term (&nexta);
	       app_affix_list (new, nexta);
	     };
	  *e = new_expr_concat (new);
	};

private void should_be_composite (expr *e, affix a)
	{ affix_list new = new_affix_list ();
	  affix nexta;
	
	  app_affix_list (new, a);
	  should_be_term (&nexta);
	  app_affix_list (new, nexta);
	  while (is_symbol (STAR))
	     { should_be_term (&nexta);
	       app_affix_list (new, nexta);
	     };
	  *e = new_expr_compos (new);
	};

private int is_expression (expr *e)
	{ affix a;
	  if (!is_term (&a)) return (0);
	  if (is_symbol (STAR)) should_be_composite (e,a);
	  else if (is_symbol (PLUS)) should_be_concatenation (e,a);
	  else *e = new_expr_single (a);
	  return (1);
	};

private symbol exprrecset[] =
	{ COMMA, SEMICOLON, COLON, POINT, FLOW,
	  RIGHTPARENTHESIS, UNDEFINED };
private void should_be_expression (expr *e)
	{ if (is_expression (e)) return;
	  parser_error (line, column, "expression expected");
	  skiptokens (exprrecset);
	  *e = expr_nil;
	};

private int is_position (pos *p)
	{ expr e;
	  int kind;

	  if (is_symbol (FLOW))
	     { should_be_expression (&e);
	       kind = inherited;
	     }
	  else if (is_expression (&e))
	     { if (is_symbol (FLOW)) kind = derived;
	       else kind = noflow;
	     }
	  else return (0);
	  *p = new_pos (kind, e);
	  return (1);
	};

private symbol posrecset[] = 
	{ COMMA, SEMICOLON, COLON, POINT, RIGHTPARENTHESIS };
private void should_be_position (pos *p)
	{ if (is_position (p)) return;
	  parser_error (line, column, "position expected");
	  skiptokens (posrecset);
	  *p = pos_nil;
	};

private symbol displayrecset[] =
	{ COMMA, SEMICOLON, COLON, POINT };
private void rest_display (pos_list pl)
	{ pos newp;
	  do
	     { should_be_position (&newp);
	       app_pos_list (pl, newp);
	     }
	  while (is_symbol (COMMA));
	  should_be_symbol (RIGHTPARENTHESIS, ")", displayrecset);
	};

private int is_display (pos_list *pl)
	{ if (!is_symbol (LEFTPARENTHESIS)) return (0);
	  *pl = new_pos_list ();
	  rest_display (*pl);
	  return (1);
	};

private void may_be_display (pos_list *pl)
	{ if (is_display (pl)) return;
	  *pl = new_pos_list ();
	};

private int is_a_nonterminal_with_display (char **nont, pos_list *dpy)
	{ char buf[MAXSTRLEN];
	  int lin, col;

	  if ((thistoken != IDENTIFIER) && (thistoken != LEFTPARENTHESIS))
	     return (0);
	  buf[0] = '\0';
	  lin = line;
	  col = column;
	  *dpy = new_pos_list ();
	  while (1)
	     { char *id;
	       if (is_identifier (&id)) strcat (buf, id);
	       else if (is_symbol (LEFTPARENTHESIS)) rest_display (*dpy);
	       else break;
	     };
	  if (strlen (buf) == 0)
	     { parser_error (lin, col, "missing nonterminal");
	       *nont = addto_names (ERROR_ID);
	     }
	  else *nont = addto_names (buf);
	  return (1);
	};

private void should_be_a_nonterminal_with_display (char **nont, pos_list *dpy)
	{ if (is_a_nonterminal_with_display (nont, dpy)) return;
	  parser_error (line, column, "nonterminal expected");
	  skiptokens (identrecset);
	  *nont = addto_names (ERROR_ID);
	  *dpy = new_pos_list ();
	};

private int is_call (member *m)
	{ char *nont;
	  pos_list ps;
	  call cnew;
	  
	  if (!is_a_nonterminal_with_display (&nont, &ps)) return (0);
	  cnew = new_call (nont, ps);
	  *m = new_member_call (cnew);
	  return (1);
	};

private int is_terminal (member *m)
	{ char *terminal;

	  if (!is_string (&terminal)) return (0);
	  *m = new_member_terminal (terminal);
	  return (1);
	};

private int is_semiterminal (member *m)
	{ set s;
	  pos_list display;
	  semiterminal semi;

	  if (!is_proper_set (&s)) return (0);
	  may_be_display (&display);
	  semi = new_semiterminal (s, display);
	  *m = new_member_semiterminal (semi);
	  return (1);
	};

private int is_cut (member *m)
	{ if (!is_symbol (CUT)) return (0);
	  *m = new_member_cut ();
	  return (1);
	};

private int is_member (member *m)
	{ if (is_call (m)) return (1);
	  if (is_terminal (m)) return (1);
	  if (is_semiterminal (m)) return (1);
	  if (is_cut (m)) return (1);
	  return (0);
	};

private void should_be_member (member *m)
	{ if (is_member (m)) return;
	  parser_error (line, column, "member expected");
	  skiptokens (displayrecset);
	  *m = member_nil;
	};

private void rest_members (member m, member_list *ml)
	{ member newm;
	  *ml = new_member_list ();
	  app_member_list (*ml, m);

	  while (is_symbol (COMMA))
	     { should_be_member (&newm);
	       app_member_list (*ml, newm);
	     };
	};

private void may_be_alternative (member_list *ml)
	{ member m;
	  *ml = new_member_list ();

	  if (!is_member (&m)) return;
	  rest_members (m, ml);
	};

private void create_start_rule (char *nont, pos_list dpy, int lin, int col)
	{ if (start_rule != call_nil)
	     parser_error (lin, col, "multiple start rule");
	  else start_rule = new_call (nont, dpy);
	};

private void may_be_expression (expr *e)
	{ if (is_expression (e)) return;
	  *e = expr_nil;			/* empty alternative */
	};

private void create_hyper_rule (char *nont, alt_list alts, int lin, int col)
	{ hyper_rule hr = new_hyper_rule (nont, alts);
	  if (!enter_hyper_rule (hr))
	     parser_error (lin, col, "multiply defined hyper group %s", nont);
	};

/* rest_meta_rule will be called after recognizing :: */
private symbol emptyrecset[] = { UNDEFINED };
private void check_no_display (pos_list dpy, int lin, int col)
	{ if (dpy -> nrofps > 0)
	     parser_error (lin, col, "meta rule has a display");
	};

private void rest_meta_rule (char *nont, pos_list dpy, int lin, int col)
	{ meta_alt_list new = new_meta_alt_list ();
	  meta_rule meta;
	  expr e;

	  check_no_display (dpy, lin, col);
	  may_be_expression (&e);
	  app_meta_alt_list (new, new_meta_alt (e));
	  while (is_symbol (SEMICOLON))
	      { may_be_expression (&e);
		app_meta_alt_list (new, new_meta_alt (e));
	      };
	  should_be_symbol (POINT, ".", emptyrecset);
	  meta = new_meta_rule (nont, new);
	  if (!enter_meta_rule (meta))
	     parser_error (lin, col, "multiply defined meta nonterminal %s",
			   nont);
	};

private void expect_new_alternative (char *nont, alt_list alts)
	{ char *nont2;
	  int lin = line;
	  int col = column;
	  member_list ml;
	  pos_list dpy;
	  if (is_a_nonterminal_with_display (&nont2, &dpy))
	     { call c;
	       member m;

	       if (is_symbol (COLON))
		  { if (nont != nont2)
		       parser_error (lin, col,
				     "lhs mismatch between alternatives");
		    may_be_alternative (&ml);
		    app_alt_list (alts, new_alt (dpy, ml));
		    return;
		  };
	       c = new_call (nont2, dpy);
	       m = new_member_call (c);
	       rest_members (m, &ml);
	       dpy = rdup_pos_list (alts -> as [alts -> nrofas - 1] -> display);
	       app_alt_list (alts, new_alt (dpy, ml));
	       return;
	     };
	  may_be_alternative (&ml);
	  dpy = rdup_pos_list (alts -> as [alts -> nrofas - 1] -> display);
	  app_alt_list (alts, new_alt (dpy, ml));
	};

private symbol hyperrecset[] = { SEMICOLON, POINT, UNDEFINED };
private symbol rulerecset[] = { POINT, UNDEFINED };
private void rest_syntax_or_start_rule (char *nont, pos_list dpy, int pred,
					int lin, int col);
private void rest_syntax_rule (char *nont, int pred, alt first_alt,
			       int lin, int col)
	{ alt_list alts = new_alt_list ();
	  app_alt_list (alts, first_alt);
	  if (first_nonterminal == string_nil) first_nonterminal = nont;
	  while (1)
	     { if (is_symbol (SEMICOLON)) expect_new_alternative (nont, alts);
	       else
		  { char *nont2;
		    pos_list dpy;
		    member_list ml;
		    int lin2, col2;
		    should_be_symbol (POINT, ".", emptyrecset);
		    if ((thistoken != IDENTIFIER) &&
			(thistoken != LEFTPARENTHESIS))
		       { create_hyper_rule (nont, alts, lin, col);
			 return;
		       };
		    lin2 = line;
		    col2 = column;
		    should_be_a_nonterminal_with_display (&nont2, &dpy);
		    if (is_symbol (DOUBLECOLON))
		       { create_hyper_rule (nont, alts, lin, col);
			 rest_meta_rule (nont2, dpy, lin2, col2);
			 return;
		       };
		    if (!pred && is_symbol (POINT))
		       { create_hyper_rule (nont, alts, lin, col);
			 create_start_rule (nont2, dpy, lin2, col2);
			 return;
		       };
		    if (nont != nont2)	/* new syntax rule */
		       { create_hyper_rule (nont, alts, lin, col);
			 rest_syntax_or_start_rule
					(nont2, dpy, pred, lin2, col2);
			 return;
		       };
		    should_be_symbol (COLON, ":", hyperrecset);
		    may_be_alternative (&ml);
		    app_alt_list (alts, new_alt (dpy, ml));
		  };
	     };
	};

private void rest_syntax_or_start_rule (char *nont, pos_list dpy, int pred,
					int lin, int col)
	{ if (!pred && is_symbol (POINT))
	     create_start_rule (nont, dpy, lin, col);
	  else
	     { member_list ml;
	       alt new;
	       should_be_symbol (COLON, ":", hyperrecset);
	       may_be_alternative (&ml);
	       new = new_alt (rdup_pos_list (dpy), ml);
	       rest_syntax_rule (nont, pred, new, lin, col);
	     };
	};

typedef struct spec_rec 
	{ char *string;
	  int kind;
	} spec_rec;

private spec_rec meta_types [] =
	{{ "string", string_type },
	 { "tuple",  tuple_type },
	 { "int", integer_type },
	 { "any", any_type },
	 { string_nil, 0 }};

private spec_rec meta_kinds [] =
	{{ "single", single_meta_value },
	 { "finite", multiple_meta_value },
	 { "recognizer", recognizer_meta_value },
	 { string_nil, 0}};

private spec_rec meta_emptys [] =
	{{ "mayproduceempty", h_mayproduceempty},
	 { "neverproducesempty", h_neverproducesempty},
	 { string_nil, 0}};

private spec_rec hyper_kinds [] =
	{{ "predicate", h_predicate },
	 { "semipredicate", h_semipredicate },
	 { string_nil, 0}};

private int should_be_specification (char *nont, spec_rec *table)
	{ int lin = line;
	  int col = column;
	  char *spec;
	  spec_rec *ptr;

	  should_be_identifier (&spec);
	  for (ptr = table; ptr -> string != string_nil; ptr++)
	     if (strcmp (ptr -> string, spec) == 0) return (ptr -> kind);
	  parser_error (lin, col, "illegal specification of predefined %s",
			nont);
	  return (0);
	};

private void external_rule ()
	{ char *nont;
	  pos_list pl;
	  meta_rule mrule;
	  hyper_rule hrule;
	  int lin = line;
	  int col = column;
	  int type, kind, empty;

	  should_be_identifier (&nont);
	  if (is_symbol (DOUBLECOLON))
	     { type = should_be_specification (nont, meta_types);
	       should_be_symbol (COMMA, ",", rulerecset);
	       kind = should_be_specification (nont, meta_kinds);
	       should_be_symbol (COMMA, ",", rulerecset);
	       empty = should_be_specification (nont, meta_emptys);
	       should_be_symbol (POINT, ".", emptyrecset);
	       mrule = new_external_meta_rule (nont, type, kind, empty);
	       if (!enter_meta_rule (mrule))
		  parser_error (lin, col, "multiply predefined meta rule %s",
				nont);
	     }
	  else
	     { alt anew;
	       alt_list al;

	       may_be_display (&pl);
	       anew = new_alt (pl, member_list_nil);
	       al = new_alt_list ();
	       app_alt_list (al, anew);

	       should_be_symbol (COLON, ":", rulerecset);
	       kind = should_be_specification (nont, hyper_kinds);
	       should_be_symbol (POINT, ".", emptyrecset);
	       hrule = new_external_hyper_rule (nont, al, kind);
	       if (!enter_hyper_rule (hrule))
		  parser_error (lin, col, "multiply predefined hyper group %s",
				nont);
	     };
	};

private int is_rule (int pred)
	{ char *nont;
	  int lin = line;
	  int col = column;
	  pos_list dpy;

	  if (pred && is_symbol (DOLLAR))
	     { external_rule ();
	       return (1);
	     };
	  if (!is_a_nonterminal_with_display (&nont, &dpy)) return (0);
	  if (is_symbol (DOUBLECOLON)) rest_meta_rule (nont, dpy, lin, col);
	  else rest_syntax_or_start_rule (nont, dpy, pred, lin, col);
	  return (1);
	};

private void maybe_rules (int pred)
	{ while (is_rule (pred));
	};

private void should_be_eof ()
	{ if (thistoken == EOFSYMBOL) return;
	  parser_error (line, column, "end of file expected");
	};

private void create_start_rule_for_first_nonterminal ()
	{ hyper_rule def;
	  pos_list pl;
	  pos_list dpy;
	  def = lookup_hyper_rule (first_nonterminal);	/* it must be there */
	  pl = def -> alts -> as[0] -> display;
	  if (pl == pos_list_nil) dpy = pos_list_nil;
	  else
	     { int nrofps = pl -> nrofps;
	       int ix;
	       dpy = new_pos_list ();
	       for (ix = 0; ix < nrofps; ix++)
		  { expr e;
		    affix a;
		    char *name;
		    affix_variable var;
		    char buf[20];

		    sprintf (buf, "startrule_%d", ix);
		    name = addto_names (buf);
		    var = new_affix_variable (name);
		    a = new_affix_nonterminal (var);
		    e = new_expr_single (a);
		    app_pos_list (dpy, new_pos (pl -> ps[ix] -> kind, e));
		  };
	     };
	  start_rule = new_call (first_nonterminal, dpy);
	};

private void create_start_alt ()
	{ member mem = new_member_call (start_rule);
	  member_list mems = new_member_list ();
	  app_member_list (mems, mem);
	  start_alt = new_alt (new_pos_list (), mems);
	};

private void check_for_single_affixes (pos_list dpy)
	{ int i;
	  if (dpy == pos_list_nil) return;
	  for (i = 0; i < dpy -> nrofps; i++)
	     if (dpy -> ps[i] -> ex -> tag != tag_single)
		{ error ("position %d of start rule is not a single affix",
			 i+1);
		  parse_errors++;
		};
	};

public void parse_eag (FILE *fd, int pred)
	{ init_parser ();
	  init_lex (fd);

	  if (!pred) warning ("parsing...");
	  maybe_rules (pred);
	  should_be_eof ();
	  fclose (fd);
	  if (!pred && (start_rule != call_nil))
	     check_for_single_affixes (start_rule -> display);
	  if ((parse_errors > 0) || (lex_errors > 0))
	     panic ("%d parse error%s found while parsing %s",
		    parse_errors, (parse_errors == 1)?"":"s",
		    (pred)?"predefines":"grammar");
	  if (!pred)
	     { if (start_rule == call_nil)
	          { if (first_nonterminal == (char *)0)
		       panic ("no start rule could be found");
		    create_start_rule_for_first_nonterminal ();
		  };
	       create_start_alt ();
	     };
	};
