/*
   File: lex.c
   Does lexical analysis of EAGs
*/

/* global includes */
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>

/* libeag includes */
#include <export.h>
#include <error.h>
#include <textstorage.h>

/* local includes */
#include <sizes.h>
#include <lex.h>

/* public variables */
public int lex_errors;
public int line, column;
public symbol thistoken, nexttoken;
public char *sval;
public int nval;

/* private variables */
private int nextline, nextcolumn;
private char *nextsval;
private int nextnval;

private FILE *in;
private int thischar, nextchar;
private int lexline, lexcolumn;

private void lexical_error (int lin, int col, char *format, ...)
	{ char buf[MAXSTRLEN];
	  va_list arg_ptr;
	  va_start (arg_ptr, format);
	  vsprintf (buf, format, arg_ptr);
	  va_end (arg_ptr);

	  lex_errors++;
	  error ("lexical error at line %d, col %d: %s", lin, col, buf);
	};

private void lexical_warning (int lin, int col, char *format, ...)
	{ char buf[MAXSTRLEN];
	  va_list arg_ptr;
	  va_start (arg_ptr, format);
	  vsprintf (buf, format, arg_ptr);
	  va_end (arg_ptr);
	  warning ("lexical warning at line %d, col %d: %s", lin, col, buf);
	};

private void nextch ()
	{ if (thischar == '\n') { lexline++; lexcolumn = 1; }
	  else lexcolumn++;
	  thischar = nextchar;
	  if (nextchar != EOF) nextchar = fgetc (in);
	};

private int is_letter (char *value)
	{ if (('a' <= thischar) && (thischar <= 'z'))
	     { *value = thischar;
	       nextch ();
	       return (1);
	     };
	  if (('A' <= thischar) && (thischar <= 'Z'))
	     { *value = thischar;
	       nextch ();
	       return (1);
	     };
	  return (0);
	};

private int is_letgits (char *value)
	{ if (is_letter (value)) return (1);
	  if (('0' <= thischar) && (thischar <= '9'))
	     { *value = thischar;
	       nextch ();
	       return (1);
	     };
	  return (0);
	};

private void may_skip_layout()
	{ while ((thischar == ' ') || (thischar == '\t') ||
		 (thischar == '\n') || (thischar == '#'))
	     if (thischar == '#')
	        do { nextch(); } while (thischar != '\n');
	     else nextch ();
	};

private char string_store[MAXSTRLEN];
private void scanidentifier (char firstchar)
	{ char *ptr = string_store + 1;
	  string_store[0] = firstchar;
	  while (1)
	     if (is_letgits (ptr)) ptr++;
	     else if ((thischar == ' ') || (thischar == '\t')) nextch();
	     else break;
	  *ptr = '\0';
	  nexttoken = IDENTIFIER;
	  nextsval = addto_names (string_store);
	};

private int is_a_special (char c, char *ptr)
	{ int lin = lexline;
	  int col = lexcolumn;
	  if ((thischar == c) && (nextchar == c))
	     { *ptr = c;
	       nextch ();
	       nextch ();
	       lexical_warning (lin, col, "old fashioned syntax");
	       return (1);
	     };
	  if (thischar != '\\') return (0);
	  nextch ();
	  switch (thischar)
	     { case 'n': *ptr = '\n'; break;
	       case 't': *ptr = '\t'; break;
	       case '\\': *ptr = '\\'; break;
	       case '"': *ptr = '"'; break;
	       case '{': *ptr = '{'; break;
	       case '}': *ptr = '}'; break;
	       default:
		  if (thischar == c)
		     { *ptr = c;
		     }
		  else lexical_error (lin, col, "Illegal special chars \\%c",
				      thischar);
	     };
	  nextch ();
	  return (1);
	};

private void scan_string_or_set (char c)
	{ char *ptr = string_store;
	  while (1)
	     { if ((thischar == '\n') || (thischar == EOF))
		  { lexical_error (nextline, nextcolumn,
				   "string or set contains newline or eof");
		    nextsval = addto_names ("<ERROR>");
		    return;
		  }
	       else if (is_a_special (c, ptr)) ptr++;
	       else if (thischar == c) break;
	       else
		  { *ptr = thischar;
		    ptr++;
		    nextch ();
		  };
	     };
	  nextch ();		/* c */
	  *ptr = '\0';
	  nextsval = addto_names (string_store);
	};

private void scanstring ()
	{ nextch ();
	  scan_string_or_set ('"');
	  nexttoken = STRING;
	};

private void scanset ()
	{ nextch ();
	  scan_string_or_set ('}');
	  nexttoken = SET;
	};

private int is_digit (int *value)
	{ if (('0' <= thischar) && (thischar <= '9'))
	     { *value = thischar - '0';
	       nextch ();
	       return (1);
	     };
	  return (0);
	};

#define MAXINTDIV10 214748364
#define MAXINTMOD10 8
private void scannumber (int firstdig)
	{ int dig;
	  int value = firstdig;
	  while (is_digit (&dig))
	     { if ((value > MAXINTDIV10) ||
		   ((value == MAXINTDIV10) && (MAXINTMOD10 <= dig)))
		  { lexical_error (nextline, nextcolumn, "too large number");
		    while (is_digit (&dig));
		    nexttoken = NUMBER;
		    nextnval = 0;
		  }
	       else value = value * 10 + dig;
	     };
	  nexttoken = NUMBER;
	  nextnval = value;
	};

private void scancolons ()
	{ nextch ();
	  if (thischar == ':')
	     { nextch ();
	       nexttoken = DOUBLECOLON;
	     }
	  else nexttoken = COLON;
	};

private void yieldsymbol (int sy)
	{ nexttoken = sy;
	  nextch ();
	};

private void yieldundefined ()
	{ lexical_error (nextline, nextcolumn, "Illegal character: '%c'",
			 thischar);
	  nextch ();
	  nexttoken = UNDEFINED;
	};

private void scancut ()
	{ if (nextchar == '>')
	     { nextch ();
	       nextch ();
	       nexttoken = CUT;
	     }
	  else
	     yieldundefined ();
	};

public void insymbol ()
	{ char firstchar;
	  int firstdigit;
	  line = nextline;
	  column = nextcolumn;
	  thistoken = nexttoken;
	  nval = nextnval;
	  sval = nextsval;
	  may_skip_layout ();
	  nextcolumn = lexcolumn;
	  nextline = lexline;
	  if (is_letter (&firstchar)) scanidentifier (firstchar);
	  else if (is_digit (&firstdigit)) scannumber (firstdigit);
	  else
	     switch (thischar)
		{ case '!': yieldsymbol (EXCLAMATIONMARK); break;
		  case '"': scanstring (); break;
		  case '$': yieldsymbol (DOLLAR); break;
		  case '(': yieldsymbol (LEFTPARENTHESIS); break;
		  case ')': yieldsymbol (RIGHTPARENTHESIS); break;
		  case '*': yieldsymbol (STAR); break;
		  case '+': yieldsymbol (PLUS); break;
		  case ',': yieldsymbol (COMMA); break;
		  case '-': scancut (); break;
		  case '.': yieldsymbol (POINT); break;
		  case ':': scancolons (); break;
		  case ';': yieldsymbol (SEMICOLON); break;
		  case '>': yieldsymbol (FLOW); break;
		  case '^': yieldsymbol (UP); break;
		  case '{': scanset (); break;
		  case EOF: yieldsymbol (EOFSYMBOL); break;
		  default: yieldundefined ();
		};
	};

public void init_lex (FILE *fd)
	{ lex_errors = 0;
	  in = fd;
	  lexline = 1;
	  lexcolumn = -1;
	  thischar = ' ';
	  nextchar = ' ';
	  insymbol ();
	  insymbol ();
	};
