/*
   File: lexer.c
   Defines a lexer for EAG3

   Copyright (C) 2008-2011 Marc Seutter

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.

   CVS ID: "$Id: lexer.c,v 1.18 2013/03/13 11:58:37 marcs Exp $"
*/

/* global includes */
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>

/* libdcg includes */
#include <dcg.h>
#include <dcg_error.h>
#include <dcg_string.h>
#include <dcg_plist.h>
#include <dcg_plist_ops.h>

/* libeagbase includes */
#include "ebase_input.h"

/* local includes */
#include "eag_ds.h"
#include "options.h"
#include "globals.h"
#include "parser.h"
#include "lexemes.h"
#include "lexer.h"

/* Lexer administration */
#define MAXBUFLEN 1024
static char line_buffer[MAXBUFLEN];
static FILE *lex_fd;
static string lex_fname;
static int lex_line;
static int lex_col;
static int lex_predef_flag;

static void lexer_error (char *format, ...)
{ char buf[MAXSTRLEN];
  va_list arg_ptr;
  va_start (arg_ptr, format);
  vsprintf (buf, format, arg_ptr);
  va_end (arg_ptr);
  dcg_error (0, "File %s, line %d, col %d: %s", lex_fname, lex_line, lex_col + 1, buf);
}

static void lexer_error_at (int line, int col, char *format, ...)
{ char buf[MAXSTRLEN];
  va_list arg_ptr;
  va_start (arg_ptr, format);
  vsprintf (buf, format, arg_ptr);
  va_end (arg_ptr);
  dcg_error (0, "File %s, line %d, col %d: %s", lex_fname, line, col + 1, buf);
}

static void lexer_warning_at (int line, int col, char *format, ...)
{ char buf[MAXSTRLEN];
  va_list arg_ptr;
  va_start (arg_ptr, format);
  vsprintf (buf, format, arg_ptr);
  va_end (arg_ptr);
  dcg_warning (0, "File %s, line %d, col %d: %s", lex_fname, line, col + 1, buf);
}

/*
   Position interface
   The column returned is one higher than our position in the line
*/
void get_current_filename (string *rfname)
{ *rfname = lex_fname;
}

void get_current_position (int *rline, int *rcol)
{ *rline = lex_line;
  *rcol = lex_col + 1;
}

void get_erroneous_name (string *rname)
{ *rname = new_string ("<ERROR>");
}

int is_predefs_spec ()
{ return (lex_predef_flag);
}

/*
   We read on a per line basis
*/
static void read_next_line ()
{ char *rbuf;
  if (line_buffer[0] == '\0') return;
  lex_line++;
  lex_col = 0;
  rbuf = fgets (line_buffer, MAXBUFLEN - 1, lex_fd);
  if (rbuf == NULL) line_buffer[0] = '\0';	/* Mark EOF */
}

int end_of_file ()
{ return (line_buffer[0] == '\0');
}

static int end_of_line ()
{ return (line_buffer[lex_col] == '\n');
}

/*
   Recognition of character classes
*/
static char curr_char ()
{ return (line_buffer [lex_col]);
}

static int is_char (char ch)
{ if (line_buffer [lex_col] != ch) return (0);
  lex_col++;
  return (1);
}

static char get_next_char ()
{ if (end_of_line ()) return ('\0');
  return (line_buffer [lex_col++]);
}

static int is_white_space ()
{ if (end_of_line ())
    { read_next_line ();
      return (1);
    }
  if (is_char ('\r')) return (1);
  if (is_char ('\t')) return (1);
  return (is_char (' '));
}

static int is_digit (char *rch)
{ char ch = line_buffer [lex_col];
  if (('0' <= ch) && (ch <= '9'))
    { lex_col++;
      *rch = ch;
      return (1);
    };
  return (0);
}

static int is_octal_digit (int *val)
{ char ch = line_buffer [lex_col];
  if (('0' <= ch) && (ch <= '7'))
    *val = (int) (ch - '0');
  else return (0);
  lex_col++;
  return (1);
}

static int is_hex_digit (int *val)
{ char ch = line_buffer [lex_col];
  if (('0' <= ch) && (ch <= '9'))
    *val = (int) (ch - '0');
  else if (('A' <= ch) && (ch <= 'F'))
    *val = (int) (ch - 'A') + 10;
  else if (('a' <= ch) && (ch <= 'f'))
    *val = (int) (ch - 'a') + 10;
  else return (0);
  lex_col++;
  return (1);
}

static int is_small_letter (char *rch)
{ char ch = line_buffer [lex_col];
  if (('a' <= ch) && (ch <= 'z'))
    { lex_col++;
      *rch = ch;
      return (1);
    };
  return (0);
}

static int is_capital_letter (char *rch)
{ char ch = line_buffer [lex_col];
  if (('A' <= ch) && (ch <= 'Z'))
    { lex_col++;
      *rch = ch;
      return (1);
    };
  return (0);
}

static int char_is_extended_letter (char ch)
{ unsigned int ch_val = (unsigned int) ch;
  return ((128 <= ch_val) && (ch_val <= 255));
}

static int is_extended_letter (char *rch)
{ char ch = line_buffer [lex_col];
  if (char_is_extended_letter (ch))
    { lex_col++;
      *rch = ch;
      return (1);
    };
  return (0);
}

static int char_is_ornament_char (char ch)
{ return ((ch == '_') || (ch == '@') || (ch == '?') || (ch == '+') || (ch == '-'));
}

static int is_affix_nonterminal_ornament (char *rch)
{ if (is_char ('_')) *rch = '_';
  else if (is_char ('@')) *rch = '@';
  else if (is_char ('?')) *rch = '?';
  else return (0);
  return (1);
}

static int is_ornament (char *rch)
{ if (is_char ('-')) *rch = '-';
  else if (is_char ('+')) *rch = '+';
  else if (is_char ('_')) *rch = '_';
  else if (is_char ('@')) *rch = '@';
  else if (is_char ('?')) *rch = '?';
  else return (0);
  return (1);
}

static int is_letter (char *rch)
{ return (is_small_letter (rch) || is_capital_letter (rch));
}

static int is_letment (char *rch)
{ return (is_letter (rch) || is_digit (rch) ||
	  is_extended_letter (rch) || is_ornament (rch));
}

static int is_relation_letment (char *rch)
{ if (is_char ('_')) *rch = '_';
  else if (is_letter (rch)) ;
  else return (0);
  return (1);
}

static int is_affix_nonterminal_letment (char *rch)
{ return (is_capital_letter (rch) || is_extended_letter (rch) ||
	  is_affix_nonterminal_ornament (rch));
}

static int ch_is_affix_nonterminal_letment (char ch)
{ unsigned uval = (unsigned) ch;
  if (('A' <= ch) && (ch <= 'Z')) return (1);
  if ((128 <= uval) && (uval <= 255)) return (1);
  switch (ch)
    { case '_':
      case '@':
      case '?': return (1);
      default: break;
    };
  return (0);
}

static int is_affix_terminal_first_letment (char *rch)
{ char curr_ch = line_buffer[lex_col];
  char next_ch = line_buffer[lex_col + 1];
  if ((curr_ch == '-') || (curr_ch == '+'))
    { /* Do LL(2) lookahead to determine */
      if (('a' <= next_ch) && (next_ch <= 'z'))
	{ lex_col++;
          *rch = curr_ch;
	  return (1);
	};
      return (0);
    };
  return (is_small_letter (rch));
}

static int is_affix_terminal_letment (char *rch)
{ return (is_small_letter (rch) || is_digit (rch) ||
	  is_extended_letter (rch) || is_ornament (rch));
}

/*
   Do LL(2) lookahead where needed
*/
int ahead_special_affix_terminal ()
{ char curr_ch = line_buffer[lex_col];
  char next_ch = line_buffer[lex_col + 1];
  return (((curr_ch == '-') || (curr_ch == '+')) &&
	  (('a' <= next_ch ) && (next_ch <= 'z')));
}

static int ahead_rest_real_denoter ()
{ if (line_buffer[lex_col] == '.')
    return (('0' <= line_buffer[lex_col + 1]) && (line_buffer[lex_col + 1] <= '9'));
  if ((line_buffer[lex_col] != 'E') && (line_buffer[lex_col] != 'e')) return (0);
  if (('0' <= line_buffer[lex_col + 1]) && (line_buffer[lex_col + 1] <= '9')) return (1);
  if ((line_buffer[lex_col+1] != '+') && (line_buffer[lex_col + 1] != '-')) return (0);
  return (('0' <= line_buffer[lex_col + 2]) && (line_buffer[lex_col + 2] <= '9'));
}

int ahead_dot_number ()
{ int curr_col = lex_col;
  if (line_buffer[curr_col++] != '.') return (0);
  while ((line_buffer[curr_col] == ' ') ||
	 (line_buffer[curr_col] == '\t')) curr_col++;
  return (('0' <= line_buffer[curr_col]) && (line_buffer[curr_col] <= '9'));
}

/*
   Layout and comments
*/
static int is_wanted_variant ()
{ char *ptr;
  if (lex_col != 1) return (0);
  if (variants_option == string_nil) return (0);
  for (ptr = variants_option; *ptr; ptr++)
    if ((line_buffer[lex_col] == *ptr) && (line_buffer[lex_col] == ' '))
      { lex_col += 2;
	return (1);
      };
  return (0);
}

static int is_comment ()
{ if (!is_char ('#')) return (0);
  while (!end_of_line ())
    { if (is_wanted_variant ()) return (1);
      (void) get_next_char ();
    }
  return (1);
}

static void may_be_layout ()
{ while (is_white_space () || is_comment ()) ;
}

/*
   Recognition of fixed symbols
*/
static int my_ahead_symbol (char *sy, int *rcol)
{ char *sptr = sy;
  int col = lex_col;
  for (; *sptr; sptr++, col++)
    if (*sptr != line_buffer[col])
      return (0);
  *rcol = col;
  return (1);
}

int ahead_symbol (char *sy)
{ int col;
  return (my_ahead_symbol (sy, &col));
}

int is_symbol (char *sy)
{ int col;
  if (!my_ahead_symbol (sy, &col)) return (0);
  lex_col = col;
  may_be_layout ();
  return (1);
}

void should_be_symbol (char *sy)
{ if (is_symbol (sy)) return;
  lexer_error ("symbol %s expected", sy);
}

int is_keyword (char *keyword)
{ char nch;
  int col;
  if (!my_ahead_symbol (keyword, &col)) return (0);
  nch = line_buffer [col];
  if (('A' <= nch) && (nch <= 'Z')) return (0);
  if (('a' <= nch) && (nch <= 'z')) return (0);
  if (char_is_extended_letter (nch)) return (0);
  if (char_is_ornament_char (nch)) return (0);
  lex_col = col;
  may_be_layout ();
  return (1);
}

void should_be_keyword (char *sy)
{ if (is_keyword (sy)) return;
  lexer_error ("symbol %s expected", sy);
}

/*
   Pragmats
*/
int is_pragmat ()
{ int tcol, tlin, inum, warn;
  char my_buf[MAXBUFLEN];
  char *text;
  char nch;
  int mloc = 0;
  int more_parts = 1;
  if (!is_char ('|')) return (0);
  may_be_layout ();
  tlin = lex_line;
  tcol = lex_col;

  /* We need at least one small letter in a pragmat */
  if (!is_small_letter (&nch))
    { lexer_error_at (lex_line, tcol, "Pragmat expected");
      /* skip to end of line? */
      return (1);
    }
  while (more_parts)
    { /* store first char of part */
      my_buf [mloc++] = nch;
      while (is_small_letter (&nch))
        my_buf[mloc++] = nch;
      may_be_layout ();
      if ((more_parts = is_small_letter (&nch)))
	my_buf[mloc++] = ' ';
    };
  my_buf[mloc] = '\0';

  /* Pragmat scanned, now which one is it */
  if (streq (my_buf, "translate off"))
    translate_option = TranslationsOff;
  else if (streq (my_buf, "translate default"))
    translate_option = TranslationsDefault;
  else if (streq (my_buf, "translate"))
    { should_be_text_constant (&text);
      detach_string (&translate_fname);
      translate_fname = normalize_text_constant (text, &warn);
      translate_option = TranslationsByFile;
      if (warn) lexer_warning_at (tlin, tcol, "Superfluous escape sequence");
    }
  else if (streq (my_buf, "line"))
    { should_be_integer_number (&inum);
      if (inum >= 0)
	{ lex_line = inum;
	  lex_col = 1;
	};
    }
  else if (streq (my_buf, "separators"))
    { should_be_text_constant (&text);  
      separators_option = normalize_text_constant (text, &warn);
      if (warn) lexer_warning_at (tlin, tcol, "Superfluous escape sequence");
      detach_string ((void **) &text);
    }
  else if (streq (my_buf, "white space off"))
    white_space_option = new_string ("");
  else if (streq (my_buf, "white space"))
    { should_be_text_constant (&text);  
      if ((strlen (text) != 0) && (strchr (text, ' ') == NULL))
	lexer_error_at (tlin, tcol, "A space should be part of a non empty white space");
      else
	{ white_space_option = normalize_text_constant (text, &warn);
          if (warn) lexer_warning_at (tlin, tcol, "Superfluous escape sequence");
	};
      detach_string ((void **) &text);
      if (is_char ('*'))
	{ empty_white_space_option = 1;
	  may_be_layout ();
	};
    }
  else if (streq (my_buf, "partial parse"))
    partial_parse_option = 1;
  else if (streq (my_buf, "segment mode"))
    { input_mode_option = ParagraphInputMode;
      partial_parse_option = 1;
    }
  else if (streq (my_buf, "document mode"))
    input_mode_option = DocumentInputMode;
  else if (streq (my_buf, "paragraph mode"))
    input_mode_option = ParagraphInputMode;
  else if (streq (my_buf, "line mode"))
    input_mode_option = LineInputMode;
  else if (streq (my_buf, "invisible"))
    { should_be_symbol (":");
      should_be_invisible_nonterminal_list ();
      may_be_layout ();
    }
  else if (streq (my_buf, "closed triple db"))
    closed_triple_db_option = 1;
  else if (streq (my_buf, "long dependency delimiters"))
    long_dependency_delimiters_option = 1;
  else if (streq (my_buf, "short dependency delimiters"))
    long_dependency_delimiters_option = 0;
  else if (streq (my_buf, "hyphen convention on"))
    hyphen_convention_option = 1;
  else if (streq (my_buf, "hyphen convention off"))
    hyphen_convention_option = 0;
  else if (streq (my_buf, "hybrid parsing"))
    hybrid_parsing_option = 1;
  else if (streq (my_buf, "encoding"))
    { should_be_text_constant (&text);
      encoding_option = text;
      if (streq_nocase (text, "unicode") || streq_nocase (text, "utf8"))
        utf8_processing = 1;
    }
  else if (streq (my_buf, "radix"))
    { should_be_text_constant (&text);
      radix_option = text;
    }
  else if (streq (my_buf, "unicode") || streq (my_buf, "utf8"))
    utf8_processing = 1;
  else lexer_error_at (tlin, tcol, "Unknown pragmat");

  if (is_char ('|')) may_be_layout ();
  else lexer_error ("'|' expected");

  return (1);
}

/*
   Numbers
*/
void should_be_integer_number (int *inum)
{ int tcol = lex_col;
  real rnum;
  int kind;
  if (!is_number (&kind, inum, &rnum))
    { lexer_error ("Integer number expected");
      *inum = -1;
    };

  if (kind)
    { lexer_error_at (lex_line, tcol, "Integer number expected");
      *inum = -1;
    };
}

#define MAXINTDIV10 214748364
#define MAXINTMOD10 7
int is_number (int *kind, int *inum, real *rnum)
{ char my_buf[MAXBUFLEN];
  int tcol, ix;
  int mloc = 0;
  char dig;
  if (!is_digit (&dig)) return (0);
  tcol = lex_col;
  my_buf[mloc++] = dig;
  while (is_digit (&dig))
    my_buf[mloc++] = dig;
  if (!ahead_rest_real_denoter ())
    { /* We have an integer denoter, convert it to INT */
      int value = 0;
      for (ix = 0; ix < mloc; ix++)
	{ int dval = (int)(my_buf[ix] - '0');
	  if ((value > MAXINTDIV10) ||
	      ((value == MAXINTDIV10) && (dval > MAXINTMOD10)))
	    { lexer_error_at (lex_line, tcol, "Integer overflow");
	      break;
	    };
	  value = value * 10 + dval;
	};

      *inum = value;
      *kind = 0;
    }
  else
    { /* We have the first part of a real denoter, read the rest */
      int digs_after_period = 0;
      real value = 0.0;
      int exponent = 0;
      int exp_sign = 0;
      if (is_char ('.'))
        { /* Parse rest of digits and remember their count */
	  while (is_digit (&dig))
	    { my_buf[mloc++] = dig;
	      digs_after_period++;
	    };
	};

      /* Check for exponent */
      if (is_char ('E') || is_char ('e'))
        { if (is_char ('-')) exp_sign = 1;
	  else if (is_char ('+')) ;
	  while (is_digit (&dig))
	    { int dval = dig - '0';
	      tcol = lex_col;
	      if ((exponent > MAXINTDIV10) ||
	          ((exponent == MAXINTDIV10) && (dval > MAXINTMOD10)))
		{ lexer_error_at (lex_line, tcol, "Exponent overflow");
		  break;
		};
	      exponent = exponent * 10 + dval;
	    };
        };
      if (exp_sign) exponent = -exponent;
      exponent -= digs_after_period;

      /* Convert to real */
      for (ix = 0; ix < mloc; ix++)
	{ real dval = (real)(int)(my_buf[ix] - '0');
	  value = value * 10.0 + dval;
	};

      *rnum = value * pow (10.0, exponent);
      *kind = 1;
    };
  may_be_layout ();
  return (1);
}

/*
   Text constants
*/
void should_be_text_constant (string *text)
{ if (is_text_constant (text)) return;
  lexer_error ("text constant expected");
  *text = string_nil;
}

/*
   Some special chars in text constants
*/
static void add_hex_char (char *my_buf, int *mloc, int tloc)
{ int value = 0;
  int ix, dig;
  lex_col++;
  for (ix = 0; ix < 2; ix++)
    if (is_hex_digit (&dig))
      value = 16 * value + dig;
    else
      { lexer_error_at (lex_line, tloc, "premature end of hexadecimal char constant");
	break;
      }
  if (value == 0xAD)
    lexer_warning_at (lex_line, tloc, "this hexadecimal constant encodes for a soft hyphen");
  my_buf[*mloc] = value;
  (*mloc)++;
}

static void add_octal_char (char *my_buf, int *mloc, int tloc)
{ int value = 0;
  int ix, dig;
  lex_col++;
  for (ix = 0; ix < 3; ix++)
    if (is_octal_digit (&dig))
      value = 8 * value + dig;
    else
      { lexer_error_at (lex_line, tloc, "premature end of octal char constant");
	break;
      }
  if (value == 0xAD)
    lexer_warning_at (lex_line, tloc, "this octal constant encodes for a soft hyphen");
  my_buf[*mloc] = value;
  (*mloc)++;
}

static void add_utf8_char (char *my_buf, int *mloc, int tloc, int nr)
{ int ix, dig, hdr, nr_bytes;
  int value = 0;

  /* scan character */
  for (ix = 0; ix < nr; ix++)
    if (is_hex_digit (&dig))
      value = 16 * value + dig;
    else
      { lexer_error_at (lex_line, tloc, "premature end of utf8 char constant");
	break;
      }

  /* Encode character */
  if (value == 0xAD)
    lexer_warning_at (lex_line, tloc, "this utf8 constant encodes for a soft hyphen");

  if (value & 0xFFFE0000)
    { lexer_error_at (lex_line, tloc, "this unicode encoding is too large");
      value = 42;
      hdr = 0;
      nr_bytes = 1;
    }
  else if (value & 0x1F0000) { hdr = 0xF0; nr_bytes = 4; }
  else if (value & 0xF800) { hdr = 0xE0; nr_bytes = 3; }
  else if (value & 0x0780) { hdr = 0xC0; nr_bytes = 2; }
  else { hdr = 0; nr_bytes = 1; };

  do
    { nr_bytes--;
      my_buf[*mloc] = (char) (hdr | (value >> (nr_bytes * 6)));
      (*mloc)++;
      value &= ((1 << (nr_bytes * 6)) - 1);
      hdr = 0x80;
    }
  while (nr_bytes > 0);
}

/*
   Text constants are recognized with a predefined character conversion
   The following (C) escape sequences are directly recognized:
   \a BEL (ctrl-G)	\n (NL)  (ctrl-J)	\'	'	\ooo (Octal)
   \b BS  (ctrl-H)	\r (RET) (ctrl-M)	\"	"	\xhh (Hex)
   \f FF  (ctrl-L)	\t (TAB) (ctrl-I)

   Additionally \u+HHHH and \U+HHHHHHHH are recognized as proper UTF8 sequences
   provided that the utf8_processing is enabled.

   The other escape sequences (most notoriously \\ are left in the code:
   At this point we don't know yet whether a text constant is used as affix,
   terminal (with literal, prefix, infix and suffix markings) or as regular
   expression (with literal, prefix, infix, suffix and escaped operator markings).

   It is left to the output coding to get rid of superfluous escapes (and \\ never is).
*/
int is_text_constant (string *text)
{ char my_buf[MAXBUFLEN];
  int mloc = 0;
  int tloc = lex_col;
  if (!is_char ('\"')) return (0);
  while (1)
    { if (end_of_line ())
        { lexer_error_at (lex_line, tloc, "unterminated text constant");
	  break;
	};
      if (is_char ('\"')) break;
      if (is_char ('\\'))
	{ if (end_of_line ())
	    { lexer_error_at (lex_line, tloc, "unterminated text constant");
	      break;
	    };

	  switch (curr_char ())
	    { case '\'': my_buf[mloc++] = '\''; lex_col++; break;
	      case '\"': my_buf[mloc++] = '\"'; lex_col++; break;
	      case 'a':	my_buf[mloc++] = '\a'; lex_col++; break;
	      case 'b': my_buf[mloc++] = '\b'; lex_col++; break;
	      case 'f': my_buf[mloc++] = '\f'; lex_col++; break;
	      case 'n': my_buf[mloc++] = '\n'; lex_col++; break;
	      case 'r': my_buf[mloc++] = '\r'; lex_col++; break;
	      case 't': my_buf[mloc++] = '\t'; lex_col++; break;
	      case 'x': add_hex_char (my_buf, &mloc, tloc); break;
	      case 'u': add_utf8_char (my_buf, &mloc, tloc, 4); break;
	      case 'U': add_utf8_char (my_buf, &mloc, tloc, 8); break;
	      case '0':
	      case '1':
	      case '2':
              case '3':
              case '4':
	      case '5':
	      case '6':
	      case '7': add_octal_char (my_buf, &mloc, tloc); break;
	      default:
		/* Keep as is (including \\) */
		my_buf[mloc++] = '\\';
		my_buf[mloc++] = curr_char ();
		lex_col++;
	        break;
	    };
	}
      else my_buf[mloc++] = get_next_char ();
    };
  my_buf[mloc] = '\0';
  *text = new_string (my_buf);
  may_be_layout ();
  return (1);
}

/*
   Name parts should start with a letter followed by letments
*/
int is_name_part (string *part)
{ char my_buf[MAXBUFLEN];
  int mloc = 0;
  char mch;
  if (!is_letter (&mch)) return (0);
  my_buf[mloc++] = mch;
  while (is_letment (&mch))
    my_buf[mloc++] = mch;
  my_buf[mloc] = '\0';
  *part = new_string (my_buf);
  may_be_layout ();
  return (1);
}

/*
   Module names are simply identifier parts
*/
void should_be_module_name (string *mname)
{ if (is_module_name (mname)) return;
  get_erroneous_name (mname);
  lexer_error ("module name expected");
}

int is_module_name (string *mname)
{ return (is_name_part (mname));
}

/*
   Affix terminals should start with a small letter followed by small letments
   For linguistic application we should also allow for a leading - or +.
*/
int is_affix_terminal (string *aname)
{ char my_buf[MAXBUFLEN];
  int mloc = 0;
  char mch;
  if (!is_affix_terminal_first_letment (&mch)) return (0);
  my_buf[mloc++] = mch;
  while (is_affix_terminal_letment (&mch))
    my_buf[mloc++] = mch;
  my_buf[mloc] = '\0';
  *aname = new_string (my_buf);
  may_be_layout ();
  return (1);
}

/*
   Affix nonterminals start with a capital letter followed by capital letments.
   We do not allow digits in affix nonterminals, although affix variables have
   an optional integer suffix.
*/
int valid_affix_nonterminal_name (string_list ids)
{ string id;
  char *ptr;
  if (ids -> size != 1) return (0);
  id = ids -> array[0];
  for (ptr = id; *ptr; ptr++)
    if (!ch_is_affix_nonterminal_letment (*ptr)) return (0);
  return (1);
}

void should_be_affix_name (string *aname)
{ if (is_affix_name (aname)) return;
  get_erroneous_name (aname);
  lexer_error ("affix nonterminal expected");
}

int is_affix_name (string *aname)
{ char my_buf[MAXBUFLEN];
  int mloc = 0;
  char mch;
  if (!is_capital_letter (&mch)) return (0);
  my_buf[mloc++] = mch;
  while (is_affix_nonterminal_letment (&mch))
    my_buf[mloc++] = mch;
  my_buf[mloc] = '\0';
  *aname = new_string (my_buf);
  may_be_layout ();
  return (1);
}

void should_be_bold_name (string *bname)
{ if (is_bold_name (bname)) return;
  get_erroneous_name (bname);
  lexer_error ("bold name expected");
}

int is_bold_name (string *bname)
{ char my_buf[MAXBUFLEN];
  int mloc = 0;
  char mch;
  if (!is_capital_letter (&mch)) return (0);
  my_buf[mloc++] = mch;
  while (is_capital_letter (&mch))
    my_buf[mloc++] = mch;
  my_buf[mloc] = '\0';
  *bname = new_string (my_buf);
  may_be_layout ();
  return (1);
}

void should_be_relation_name (string *rname)
{ if (is_relation_name (rname)) return;
  get_erroneous_name (rname);
  lexer_error ("relation name expected");
}

int is_relation_name (string *rname)
{ char my_buf[MAXBUFLEN];
  int mloc = 0;
  char mch;
  if (!is_capital_letter (&mch)) return (0);
  my_buf[mloc++] = mch;
  while (is_relation_letment (&mch))
    my_buf[mloc++] = mch;
  my_buf[mloc] = '\0';
  *rname = new_string (my_buf);
  may_be_layout ();
  return (1);
}

void should_be_affix_variable_name (string *aname)
{ if (is_affix_variable_name (aname)) return;
  get_erroneous_name (aname);
  lexer_error ("affix variable expected");
}

int is_affix_variable_name (string *aname)
{ char my_buf[MAXBUFLEN];
  int mloc = 0;
  char mch;
  if (!is_capital_letter (&mch)) return (0);
  my_buf[mloc++] = mch;
  while (is_affix_nonterminal_letment (&mch))
    my_buf[mloc++] = mch;
  while (is_digit (&mch))
    my_buf[mloc++] = mch;
  my_buf[mloc] = '\0';
  *aname = new_string (my_buf);
  may_be_layout ();
  return (1);
}

/*
   Initialize the lexical scanner with a new file
*/
void init_lexer (FILE *fd, char *fname, int predef_flag)
{ if (fd == NULL) dcg_internal_error ("init_lexer");
  lex_predef_flag = predef_flag;
  lex_fd = fd;

  /* Reset token administration */
  lex_fname = new_string (fname);
  lex_line = 0;
  lex_col = 0;
  strcpy (line_buffer, "\n");
  may_be_layout ();
};

void finish_lexer ()
{ fclose (lex_fd);
}
