/*
   File: affix_rules.c
   Checks the affix rules

   Copyright (C) 2008-2011 Marc Seutter

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.

   CVS ID: "$Id: affix_rules.c,v 1.29 2012/08/16 20:51:14 marcs Exp $"
*/

/* standard includes */
#include <stdio.h>
#include <string.h>

/* support lib includes */
#include <dcg.h>
#include <dcg_alloc.h>
#include <dcg_error.h>
#include <dcg_string.h>
#include <dcg_warshall.h>

/* libeagbase includes */
#include <ebase_input.h>

/* local includes */
#include "eag_ds.h"
#include "ast_utils.h"
#include "options.h"
#include "globals.h"
#include "grammars.h"
#include "affix_tree.h"
#include "lexer.h"
#include "lexemes.h"
#include "lattices.h"
#include "nullability.h"
#include "lc_rel.h"
#include "contsens.h"
#include "affix_rules.h"

/*
   All affix rules will be stored in one big list which can be indexed by affix_rule_nr.
   We need this indexing anyway as the warshall algorithm will be needed for chain rule
   detection.
*/
static int unique_affix_rule_nr;
static affix_tree affix_space;

/*
   Phase 0:
   Initialize list of all affix rules
*/
static void init_affix_rules ()
{ unique_affix_rule_nr = 0;
  affix_space = affix_tree_nil;
}

string new_anonymous_affix_name ()
{ string aname = dcg_new_fmtd_string ("_ANON__%d", unique_affix_rule_nr);
  unique_affix_rule_nr++;
  return (aname);
}

/*
   Affix rules are compatible if they have the same name, the
   same number of alternatives and if the alternatives are denoted
   by the same set of symbols, although we do accept permutations
*/
static int are_compatible_affix_elems (affix_element elem1, affix_element elem2)
{ if (elem1 -> tag != elem2 -> tag) return (0);
  switch (elem1 -> tag)
    { case TAGAffix_var:
	return (equal_string (elem1 -> Affix_var.vname, elem2 -> Affix_var.vname));
      case TAGAffix_term:
	return (equal_string (elem1 -> Affix_term.tname, elem2 -> Affix_term.tname));
      case TAGAffix_text:
	return (equal_string (elem1 -> Affix_text.text, elem2 -> Affix_text.text));
      case TAGAffix_inum:
	return (equal_int (elem1 -> Affix_inum.ival, elem2 -> Affix_inum.ival));
      case TAGAffix_rnum:
	return (equal_real (elem1 -> Affix_rnum.rval, elem2 -> Affix_rnum.rval));
      default: dcg_bad_tag (elem1 -> tag, "are_compatible_affix_elems");
    };
  return (0);
}

static int elem_is_among_elems (affix_element elem, affix_element_list elems)
{ int ix;
  for (ix = 0; ix < elems -> size; ix++)
    if (are_compatible_affix_elems (elem, elems -> array[ix]))
      return (1);
  return (0);
}

static int are_compatible_affix_alts (affix_alternative alt1, affix_alternative alt2)
{ affix_element_list elems1 = alt1 -> elems;
  affix_element_list elems2 = alt2 -> elems;
  int ix;
  if (alt1 -> tag != alt2 -> tag) return (0);
  if (elems1 -> size != elems2 -> size) return (0);
  if ((alt1 -> tag == TAGAffix_union) || (alt1 -> tag == TAGAffix_partition))
    { /* The order in a union or a partition is irrelevant */
      for (ix = 0; ix < elems1 -> size; ix++)
	if (!elem_is_among_elems (elems1 -> array[ix], elems2))
	  return (0);
      for (ix = 0; ix < elems2 -> size; ix++)
	if (!elem_is_among_elems (elems2 -> array[ix], elems1))
	  return (0);
    }
  else /* The order is important */
    { for (ix = 0; ix < elems1 -> size; ix++)
        if (!are_compatible_affix_elems (elems1 -> array[ix], elems2 -> array[ix]))
          return (0);
    };
  return (1);
}

static int alt_is_among_alts (affix_alternative alt, affix_alternative_list alts)
{ int ix;
  for (ix = 0; ix < alts -> size; ix++)
    if (are_compatible_affix_alts (alt, alts -> array[ix]))
      return (1);
  return (0);
}

static int are_compatible_affix_rules (affix_rule arule1, affix_rule arule2)
{ int ix;
  if (arule1 -> tag != arule2 -> tag) return (0);
  switch (arule1 -> tag)
    { case TAGAffix_synonym:
	return (equal_string (arule1 -> Affix_synonym.syn, arule2 -> Affix_synonym.syn));
      case TAGAffix_alts:
        { affix_alternative_list alts1 = arule1 -> Affix_alts.alts;
	  affix_alternative_list alts2 = arule2 -> Affix_alts.alts;
	  if (alts1 -> size != alts2 -> size) return (0);
	  for (ix = 0; ix < alts1 -> size; ix++)
	    if (!alt_is_among_alts (alts1 -> array[ix], alts2))
     	      return (0);
	  for (ix = 0; ix < alts2 -> size; ix++)
	    if (!alt_is_among_alts (alts2 -> array[ix], alts1))
	      return (0);
	  return (1);
        };
      default: break;
    };
  return (0);
}

/*
   Affix rule checking phase 1
   Collect all affix rule definitions into the local affix tree and all_affix_rules list.
   Register the affix rule number and grammar number in the affix rule definition. 

   Following phase 1, we can iterate over the list of all affix definitions.
*/
static void collect_affix_rules_for_grammar (grammar gra)
{ affix_rule_list arules = gra -> arules;
  int ix;

  /* Enter all affix rules into the global affix rule tree */
  for (ix = 0; ix < arules -> size; ix++)
    { affix_rule arule = arules -> array[ix];
      affix_rule old_arule;

      arule -> anr = all_affix_rules -> size;
      arule -> gnr = gra -> gnr;

      /* Enter the affix rules into the global affix tree: accept compatible grammar rules */
      old_arule = enter_affix_tree (&affix_space, arule);
      if (old_arule == affix_rule_nil)
        app_affix_rule_list (all_affix_rules, arule);
      else if (!strict && are_compatible_affix_rules (arule, old_arule))
	contsens_warning (gra, arule -> line, arule -> col,
			  "Affix rule %s is multiply declared with a compatible rule",
			  arule -> aname);
      else
	contsens_error (gra, arule -> line, arule -> col,
			"Multiple definition of affix rule %s", arule -> aname);
    };
}

/*
   Currently the affix name export is simply a check that if it occurs on the defines
   list that it is this subgrammar that does the export (or at least is compatible with it)
*/
static int affix_rule_is_exported (string aname, grammar gra)
{ affix_rule_list arules = gra -> arules;
  int ix;
  for (ix = 0; ix < arules -> size; ix++)
    if (streq (aname, arules -> array[ix] -> aname))
      return (1);
  return (0);
}

static void export_affix_rules_for_grammar (grammar gra)
{ /* Identify exported affix rules on pdefs */
  prule_list pdefs = gra -> exp_prules;
  int ix = 0;
  while (ix < pdefs -> size)
    { plhs lhs = pdefs -> array[ix] -> lhs;
      if (valid_affix_nonterminal_name (lhs -> rname_parts) && (lhs -> lhs_pars -> size == 0))
	{ /* name consisting of a valid single part without parameters */
	  string aname = lhs -> rname_parts -> array[0];
	  if (affix_rule_is_exported (aname, gra))
	    { /* Correct export, delete from pdefs */
	      del_prule_list (pdefs, ix);
	      continue;
	    };
	};
      ix++;
    };
}

static void collect_affix_rules ()
{ int ix;
  dcg_hint ("      registering affix rules");
  for (ix = 0; ix < all_grammars -> size; ix++)
    { grammar gra = all_grammars -> array[ix];
      collect_affix_rules_for_grammar (gra);
      export_affix_rules_for_grammar (gra);
    };
}

/*
   Affix rule checking phase 2
   Identify all affixes in the right hand sides of affix rules
   Additionally promote all single productions N :: N2 to synonym rules
*/
static affix_rule try_identify_affix_rule (grammar gra, int line, int col, string aname)
{ affix_rule adef = lookup_affix_tree (affix_space, aname);
  if (adef == affix_rule_nil)
    contsens_error (gra, line, col, "Affix rule %s can't be identified", aname);
  return (adef);
}

static void identify_affix_synonym_definition (affix_rule arule, grammar gra)
{ affix_rule syndef = try_identify_affix_rule (gra, arule -> line, arule -> col,
					       arule -> Affix_synonym.syn);
  arule -> Affix_synonym.syndef = syndef;
}

static void identify_affix_element (affix_element elem, grammar gra)
{ affix_rule vdef;
  if (elem -> tag != TAGAffix_var) return;	/* Only variables need to be identified */
  vdef = try_identify_affix_rule (gra, elem -> line, elem -> col, elem -> Affix_var.vname);
  elem -> Affix_var.vdef = vdef;
}

static void identify_affixes_in_affix_elements (affix_element_list elems, grammar gra)
{ int ix;
  for (ix = 0; ix < elems -> size; ix++)
    identify_affix_element (elems -> array[ix], gra);
}

static void identify_affixes_in_affix_alts (affix_alternative_list alts, grammar gra)
{ int ix;
  for (ix = 0; ix < alts -> size; ix++)
    identify_affixes_in_affix_elements (alts -> array[ix] -> elems, gra);
}

static void try_promote_single_productions (affix_rule arule, grammar gra)
{ affix_alternative_list alts = arule -> Affix_alts.alts;
  affix_alternative alt;
  affix_element_list elems;
  affix_element elem;
  if (alts -> size > 1) return;
  alt = alts -> array[0];
  if (alt -> tag != TAGAffix_single) return;
  elems = alts -> array[0] -> elems;
  if (elems -> size > 1) return;
  elem = elems -> array[0];
  if (elem -> tag != TAGAffix_var) return;

  /* We have a single affix production: update the rule */
  arule -> tag = TAGAffix_synonym;
  arule -> Affix_synonym.syn = attach_string (elem -> Affix_var.vname);
  arule -> Affix_synonym.syndef = elem -> Affix_var.vdef;
  contsens_warning (gra, arule -> line, arule -> col,
		    "Promoted single production %s :: %s to synonym rule",
		    arule -> aname, elem -> Affix_var.vname);
  detach_affix_alternative_list (&alts);
}

static void identify_affixes_in_affix_rule (affix_rule arule, grammar gra)
{ switch (arule -> tag)
    { case TAGAffix_synonym:
	identify_affix_synonym_definition (arule, gra);
	break;
      case TAGAffix_alts:
	identify_affixes_in_affix_alts (arule -> Affix_alts.alts, gra);
	try_promote_single_productions (arule, gra);
	break;
      case TAGAffix_prim:	/* done, by default */
      default: break;
    };
}

static void identify_affix_rules_in_grammar (grammar gra)
{ affix_rule_list arules = gra -> arules;
  int ix;
  for (ix = 0; ix < arules -> size; ix++)
    identify_affixes_in_affix_rule (arules -> array[ix], gra);
}
 
static void identify_affix_rules ()
{ int ix;
  dcg_hint ("      identifying in affix rules");
  for (ix = 0; ix < all_grammars -> size; ix++)
    identify_affix_rules_in_grammar (all_grammars -> array[ix]);
}

/*
   Phase 3: Detect all chain productions in the affix rule system
   Iterate over all affix rules to determine which rules contain
   a chain production i.e. a rule of the form N :: N2; or N = N2; 
   Note that this includes synonym definitions.

   Initially we determine the one step chain production as follows:
   rel [i,j] = 1 <=> affix rule i has a chain production to affix rule j.
   Using warshall, we then determine the transitive closure of this
   relation. Those rules who have a 1 on their diagonal entry in the
   transitive closure, contain a chain production in one or more steps
   to itself, which is erroneous.
*/

/* MS: the following function is obsolete because of the single affix production promotion */
static void mark_chain_productions_in_alts (affix_alternative_list alts, int aff_nr,
					    int asize, char *rel)
{ int ix;
  for (ix = 0; ix < alts -> size; ix++)
    { affix_element_list elts = alts -> array[ix] -> elems;
      affix_element elt;
      if (elts -> size != 1)	/* at least >1 marker or affix variable */
	continue;
      elt = elts -> array[0];
      if (elt -> tag == TAGAffix_var)
	{ /* We really have a chain production */
	  int cnr = elt -> Affix_var.vdef -> anr;
	  if ((0 <= cnr) && (cnr < asize))
	    rel[aff_nr * asize + cnr] = 1;
	  else dcg_internal_error ("mark_chain_productions_in_alts");
	};
    };
}

static void mark_chain_productions_in_affix_rule (affix_rule arule, int asize, char *rel)
{ int aff_nr = arule -> anr;
  switch (arule -> tag)
    { case TAGAffix_synonym:
	{ /* Synonyms are always chain productions */
	  affix_rule syn = arule -> Affix_synonym.syndef;
	  int syn_nr = syn -> anr;
	  rel[aff_nr * asize + syn_nr] = 1;
	}; break;
      case TAGAffix_alts:
	{ affix_alternative_list alts = arule -> Affix_alts.alts;
	  mark_chain_productions_in_alts (alts, aff_nr, asize, rel);
        };
      case TAGAffix_prim: break;
      default: dcg_bad_tag (arule -> tag, "mark_chain_productions_in_affix_rule");
    };
}

static void report_chain_affix_rules (int asize, char *trans)
{ int ix;
  for (ix = 0; ix < asize; ix++)
    { affix_rule arule = all_affix_rules -> array[ix];
      int anr = arule -> anr;
      if (trans[anr * asize + anr])
	contsens_error_by_gnr (arule -> gnr, arule -> line, arule -> col,
			       "Affix rule %s defines a chain production", arule -> aname);
    };
}

static void detect_primary_chain_productions ()
{ int ix;
  int asize = all_affix_rules -> size;
  char *rel = dcg_calloc (asize * asize, sizeof (char));
  char *trans;

  /* Mark the one step chain productions */
  for (ix = 0; ix < asize * asize; ix++) rel[ix] = 0;
  dcg_hint ("      detecting chain productions in affix rules");
  for (ix = 0; ix < asize; ix++)
    { affix_rule arule = all_affix_rules -> array[ix];
      mark_chain_productions_in_affix_rule (arule, asize, rel);
    };

  /* Report the multi step self chain productions */
  trans = dcg_warshall (asize, rel); 
  report_chain_affix_rules (asize, trans);

  /* Free the two arrays: we do not need them */
  dcg_detach ((void **) &rel);
  dcg_detach ((void **) &trans);
}

/*
   Phase 4: Resolution of all synonym types.
   Following this phase all syndefs point to their final equivalent
*/
static int is_a_synonym_affix_rule (affix_rule arule, affix_rule *sdef)
{ if (arule == affix_rule_nil) return (0);
  if (arule -> tag == TAGAffix_synonym)
    { *sdef = arule -> Affix_synonym.syndef;
      return (1);
    };
  return (0);
}

static void set_affix_synonym_def (affix_rule arule, affix_rule sdef)
{ if (arule -> tag == TAGAffix_synonym)
    arule -> Affix_synonym.syndef = sdef;
  else dcg_bad_tag (arule -> tag, "set_affix_synonym_def");
}
  
static affix_rule try_resolve_synonym_rule (affix_rule arule)
{ affix_rule syndef, syndef2;

  /* If we are not some synonym, we're done */
  if (!is_a_synonym_affix_rule (arule, &syndef))
    return (arule);

  /* Determine if we are a synonym of something else */
  /* Note: if we have a chain of synonyms, it is traversed only once */
  syndef2 = try_resolve_synonym_rule (syndef);
  set_affix_synonym_def (arule, syndef2);
  return (syndef2);
}

static void resolve_synonym_rules ()
{ int ix;
  dcg_hint ("      resolving synonym affix rules");
  for (ix = 0; ix < all_affix_rules -> size; ix++)
    (void) try_resolve_synonym_rule (all_affix_rules -> array[ix]);
}

/*
   Phase 5:
   Do a meta type check of the affix rules to determine whether an affix
   rule is a numeric type, a text type (including affix rules that define
   a context free grammar), a finite lattice type or a tree structure.

   We can impose an initial typing for most of the affix rules from
   the way that they are structured. We then take the transitive
   closure. If there are still affix rules for which we cannot
   determine the meta typing, we will treat them as tree types (with
   a warning to the user that he should make his intentions more clear)

   Note that we cannot determine whether conflicts arise for the two
   kinds of lattice affix rules.
*/
static void balance_arule_type (affix_rule arule, arule_kind *dkind, arule_kind kind, int *change)
{ if (*dkind == arule_unknown)
    { *dkind = kind;
      if (change != NULL) *change = 1;
    }
  else if (*dkind == arule_error) return;
  else if (*dkind == kind) return;
  else if (kind == arule_error)
    { *dkind = arule_error;
      if (change != NULL) *change = 1;
    }
  else
    { contsens_error_by_gnr (arule -> gnr, arule -> line, arule -> col,
			     "Conflicting affix rule kinds in definition of affix rule %s",
			     arule -> aname);
      *dkind = arule_error;
      if (change != NULL) *change = 1;
    };
}

static void initial_typecheck_alt (affix_rule arule, affix_alternative alt)
{ affix_element_list elems = alt -> elems;
  int ix;
  switch (alt -> tag)
    { case TAGAffix_sequence:
	balance_arule_type (arule, &arule -> kind, arule_tree, NULL);
	break;
      case TAGAffix_concat:
      case TAGAffix_empty:
	balance_arule_type (arule, &arule -> kind, arule_text, NULL);
	break;
      case TAGAffix_union:
      case TAGAffix_partition:
	balance_arule_type (arule, &arule -> kind, arule_lattice, NULL);
      case TAGAffix_single: break;
      default: dcg_bad_tag (alt -> tag, "initial_typecheck_alt");
     };

  /*
     Check for terminal strings or numbers in the elements
     Currently we do not allow rules of the form:

     BRR :: "tree" with NUMBER

     mixtures
  */
  for (ix = 0; ix < elems -> size; ix++)
    { switch (elems -> array[ix] -> tag)
	{ case TAGAffix_text: balance_arule_type (arule, &arule -> kind, arule_text, NULL); break;
	  case TAGAffix_inum: balance_arule_type (arule, &arule -> kind, arule_int,  NULL); break;
	  case TAGAffix_rnum: balance_arule_type (arule, &arule -> kind, arule_real, NULL); break;
	  default: break;
	};
    };
}

static void initial_typecheck_alts (affix_rule arule, affix_alternative_list alts)
{ int ix;
  for (ix = 0; ix < alts -> size; ix++)
    initial_typecheck_alt (arule, alts -> array[ix]);
}

static void initial_typecheck_prim (affix_rule arule)
{ if (streq (arule -> aname, "INT")) arule -> kind = arule_int;
  else if (streq (arule -> aname, "REAL")) arule -> kind = arule_real;
  else if (streq (arule -> aname, "TEXT")) arule -> kind = arule_text;
  else if (streq (arule -> aname, "ANY"))  arule -> kind = arule_any;
  else
    { contsens_error_by_gnr (arule -> gnr, arule -> line, arule -> col,
			     "Unknown primitive affix rule %s", arule -> aname);
      arule -> kind = arule_error;
    };
}

static void initial_typecheck_rule (affix_rule arule)
{ switch (arule -> tag)
    { case TAGAffix_alts: initial_typecheck_alts (arule, arule -> Affix_alts.alts); break;
      case TAGAffix_prim: initial_typecheck_prim (arule);
      default: break;
    };
}

static void check_valid_subkind (affix_rule arule, affix_alternative alt,
				 arule_kind kind, int *change)
{ switch (kind)
    { case arule_tree:
        balance_arule_type (arule, &arule -> kind, arule_tree, change);
        return;
      case arule_lattice:
      case arule_int:
      case arule_real:
      case arule_text:
	{ switch (arule -> kind)
	    { case arule_unknown:
	      case arule_error:
	      case arule_tree: return;
	      default:
	        balance_arule_type (arule, &arule -> kind, kind, change);
	    };
	};
      default: break;
    };
}

static void typecheck_alt (affix_rule arule, affix_alternative alt, int *change)
{ affix_element_list elems = alt -> elems;
  int ix;
  switch (alt -> tag)
    { case TAGAffix_sequence:
	balance_arule_type (arule, &arule -> kind, arule_tree, change);
	break;
      case TAGAffix_concat:
      case TAGAffix_empty:
	balance_arule_type (arule, &arule -> kind, arule_text, change);
	break;
      case TAGAffix_union:
      case TAGAffix_partition:
	balance_arule_type (arule, &arule -> kind, arule_lattice, change);
      case TAGAffix_single: break;
      default: dcg_bad_tag (alt -> tag, "typecheck_alt");
    };

  /* Check for terminal strings in the elements */
  for (ix = 0; ix < elems -> size; ix++)
    { affix_element elem = elems -> array[ix];
      switch (elem -> tag)
	{ case TAGAffix_var:
	    { arule_kind kind = elem -> Affix_var.vdef -> kind;
	      check_valid_subkind (arule, alt, kind, change);
	    };
	    break;
	  case TAGAffix_term:
	    { switch (arule -> kind)
	        { case arule_int:
	 	  case arule_real:
		  case arule_text:
    		    contsens_error_by_gnr (arule -> gnr, arule -> line, arule -> col,
			     "Conflicting affix rule kinds in definition of affix rule %s",
			     arule -> aname);
		    arule -> kind = arule_error;
		    *change = 1;
		  default: break;
	        };
	    };
	    break;
	  case TAGAffix_text:
	    balance_arule_type (arule, &arule -> kind, arule_text, change);
	    break;
	  case TAGAffix_inum:
	    balance_arule_type (arule, &arule -> kind, arule_int, change);
	    break;
	  case TAGAffix_rnum:
	    balance_arule_type (arule, &arule -> kind, arule_real, change);
	    break;
	  default: dcg_bad_tag (elem -> tag, "typecheck_alt");
        };
    };
}

static void typecheck_alts (affix_rule arule, affix_alternative_list alts, int *change)
{ int ix;
  for (ix = 0; ix < alts -> size; ix++)
    typecheck_alt (arule, alts -> array[ix], change);
}

static void typecheck_rule (affix_rule arule, int *change)
{ switch (arule -> tag)
    { case TAGAffix_synonym:
	{ affix_rule syndef = arule -> Affix_synonym.syndef;
	  balance_arule_type (arule, &arule -> kind, syndef -> kind, change);
	}; break;
      case TAGAffix_alts:
	typecheck_alts (arule, arule -> Affix_alts.alts, change);
      case TAGAffix_prim: break;
      default: dcg_bad_tag (arule -> tag, "typecheck_rule");
    };
}

static void typecheck_affix_rules ()
{ int asize = all_affix_rules -> size;
  int nr_passes = 2;
  int change, ix;
  
  /* Do an initial type check */
  dcg_hint ("      typechecking affix rules");
  for (ix = 0; ix < asize; ix++)
    initial_typecheck_rule (all_affix_rules -> array[ix]);

  /* Now iterate over the affix rules until we have a closure */
  do
    { change = 0;
      for (ix = 0; ix < asize; ix++)
	typecheck_rule (all_affix_rules -> array[ix], &change);
      nr_passes++;
    }
  while (change);

  /*
     Assign the tree type to all rules that are still unknown
  */
  for (ix = 0; ix < asize; ix++)
    { affix_rule arule = all_affix_rules -> array[ix];
      if ((arule -> tag == TAGAffix_alts) && (arule -> kind == arule_unknown))
	{ contsens_warning_by_gnr (arule -> gnr, arule -> line, arule -> col,
				   "Affix rule %s assumed to be a tree type", arule -> aname);
	  arule -> kind = arule_tree;
	};
    };

  /* Now reiterate over the affix rules until we have another closure */
  do
    { change = 0;
      for (ix = 0; ix < asize; ix++)
	typecheck_rule (all_affix_rules -> array[ix], &change);
      nr_passes++;
    }
  while (change);
  dcg_hint ("      needed %d pass%s for type check", nr_passes, (nr_passes == 1)?"":"es");
}

/*
   Phase 6: check some wellformedness
   Two alternatives of the same affix rule may not be identical
*/
static int same_element (affix_element elem1, affix_element elem2)
{ if (elem1 -> tag != elem2 -> tag) return (0);
  switch (elem1 -> tag)
    { case TAGAffix_var:
	return (equivalent_affix_rule (elem1 -> Affix_var.vdef, elem2 -> Affix_var.vdef));
      case TAGAffix_term:
	return (streq (elem1 -> Affix_term.tname, elem2 -> Affix_term.tname));
      case TAGAffix_text:
        return (streq (elem1 -> Affix_text.text, elem2 -> Affix_text.text));
      default: dcg_bad_tag (elem1 -> tag, "same_element");
    };
  return (0);
}

static int same_alternatives (affix_alternative alt1, affix_alternative alt2)
{ affix_element_list elems1 = alt1 -> elems;
  affix_element_list elems2 = alt2 -> elems;
  int ix;
  if (alt1 -> tag != alt2 -> tag) return (0);
  if (elems1 -> size != elems2 -> size) return (0);
  for (ix = 0; ix < elems1 -> size; ix++)
    if (!same_element (elems1 -> array[ix], elems2 -> array[ix]))
      return (0);
  return (1);
}

/*
   The check is two fold: when the affix kind is lattice, there may not be more than
   one alternative. In the other cases, no two alternatives may be the same (either
   directly or through equivalence).
*/
static void check_wellformed_alternatives (affix_rule arule, affix_alternative_list alts)
{ int ix, iy;
  if ((arule -> kind == arule_lattice) && (alts -> size > 1))
    contsens_error_by_gnr (arule -> gnr, arule -> line, arule -> col,
			  "Affix rules of the lattice kind may not have more than one alternative");
  else
    for (ix = 0; ix < alts -> size; ix++)
      for (iy = ix + 1; iy < alts -> size; iy++)
        if (same_alternatives (alts -> array[ix], alts -> array[iy]))
	  contsens_error_by_gnr (arule -> gnr, arule -> line, arule -> col,
				 "Alternative %d and alternative %d of affix rule %s are equal",
				 ix + 1, iy + 1, arule -> aname);
}

static void check_wellformedness_for_rule (affix_rule arule)
{ if (arule -> tag == TAGAffix_alts)
    check_wellformed_alternatives (arule, arule -> Affix_alts.alts);
}

static void check_wellformedness ()
{ int ix;
  dcg_hint ("      checking wellformedness conditions");
  for (ix = 0; ix < all_affix_rules -> size; ix++)
    check_wellformedness_for_rule (all_affix_rules -> array[ix]);
}

/*
   Following phase 6, the type check is complete:
   The following routines may then be used for type access and comparison
*/
affix_rule get_prime_affix_rule (affix_rule arule)
{ affix_rule sdef;
  if (arule == affix_rule_nil) return (arule);
  if (arule == AFFIX_ERROR) return (AFFIX_ERROR);
  if (is_a_synonym_affix_rule (arule, &sdef)) return (sdef);
  return (arule);
}

string get_affix_rule_name (affix_rule arule)
{ affix_rule sdef = get_prime_affix_rule (arule);
  return (sdef -> aname);
}

affix_alternative_list pick_rhs_affix_alts (affix_rule arule)
{ affix_rule sdef = get_prime_affix_rule (arule);
  if ((sdef == affix_rule_nil) || (sdef == AFFIX_ERROR))
    return (affix_alternative_list_nil);
  if (sdef -> tag == TAGAffix_alts)
    return (sdef -> Affix_alts.alts);
  return (affix_alternative_list_nil);
}

/*
   Affix rule equivalence
*/
int equivalent_affix_rule (affix_rule arule1, affix_rule arule2)
{ affix_rule sdef1 = get_prime_affix_rule (arule1);
  affix_rule sdef2 = get_prime_affix_rule (arule2);
  if (sdef1 == affix_rule_nil) return (0);
  if (sdef2 == affix_rule_nil) return (0);
  if (sdef1 == AFFIX_ERROR) return (0);
  if (sdef2 == AFFIX_ERROR) return (0);
  return (sdef1 == sdef2);
}

/*
   For resolution of the grammar rules we need to know the leftcorner
   relation for tree kinds and text kinds. Before we can actually
   determine the left corner relation, we simplify the affix rules:

   All affix rules of the text kind are promoted to a concatenation.
   Moreover, for the text kinds, we must be able to detect, whether
   an affix rule may produce empty or not.

   Currently, we do not check for affix nonterminals that always
   produce empty, since we cannot have delayed evaluation at the
   second level.
*/
static void check_empty_producing_text_terminals (int gnr, affix_element_list elems)
{ int ix;
  for (ix = 0; ix < elems -> size; ix++)
    { affix_element elem = elems -> array[ix];
      string text;
      int is_regexp;
      int termnr;
      nullability empty;
      if (elem -> tag != TAGAffix_text) continue;
      text = elem -> Affix_text.text;
      analyze_lexeme (gnr, elem -> line, elem -> col, !agfl_compatible, text, RegexpMatch,
		      &termnr, &is_regexp, &empty);
      if (empty != e_never_produces_empty)
	contsens_error_by_gnr (gnr, elem -> line, elem -> col, "This %s may match empty",
			       (is_regexp)?"regular expression":"terminal");
      elem -> Affix_text.termnr = termnr;
      elem -> Affix_text.is_regexp = is_regexp;
      elem -> empty = e_never_produces_empty;
    };
}

static void simplify_and_mark_affix_alternative (affix_rule arule, affix_alternative alt)
{ arule_kind kind = arule -> kind;
  switch (alt -> tag)
    { case TAGAffix_empty:
	if (kind != arule_text)   dcg_internal_error ("simplify_affix_alternative");
	if (alt -> elems -> size) dcg_internal_error ("simplify_affix_alternative");
	arule -> empty = e_may_produce_empty;
	alt -> tag = TAGAffix_concat;
	break;
      case TAGAffix_single:
	if (alt -> elems -> size != 1) dcg_internal_error ("simplify_affix_alternative");
	switch (kind)
	  { case arule_text:    alt -> tag = TAGAffix_concat; break;
	    case arule_lattice: alt -> tag = TAGAffix_union; break;
	    case arule_tree:    alt -> tag = TAGAffix_sequence; break;
	    case arule_int:
	    case arule_real:	arule -> empty = e_never_produces_empty; break;
	    default: dcg_bad_tag (kind, "simplify_affix_alternative");
	  };
      default: break;
    };
  if (alt -> tag == TAGAffix_concat)
    check_empty_producing_text_terminals (arule -> gnr, alt -> elems);
}

static void simplify_and_mark_affix_rules ()
{ int ix, iy;
  dcg_hint ("      simplifying affix rules");
  for (ix = 0; ix < all_affix_rules -> size; ix++)
    { affix_rule arule = all_affix_rules -> array[ix];
      affix_alternative_list alts;

      /* If a type is equivalent to TEXT, it may (but not always) produce empty */
      if (is_text_type (arule))
	{ arule -> empty = e_may_produce_empty;
	  arule -> npred = 1;
	};
      if (arule -> tag != TAGAffix_alts) continue;
      alts = arule -> Affix_alts.alts;
      for (iy = 0; iy < alts -> size; iy++)
        simplify_and_mark_affix_alternative (arule, alts -> array[iy]);
    };
}

/*
   Lookup and static storage of the primitive types INT, REAL, STRING
*/
static affix_rule int_arule;
static affix_rule real_arule;
static affix_rule text_arule;
static void create_primitive_type_refs ()
{ int_arule = lookup_affix_tree (affix_space, "INT");
  if (int_arule == affix_rule_nil)
    dcg_panic ("could not locate primitive type INT");
  real_arule = lookup_affix_tree (affix_space, "REAL");
  if (real_arule == affix_rule_nil)
    dcg_panic ("could not locate primitive type REAL");
  text_arule = lookup_affix_tree (affix_space, "TEXT");
  if (text_arule == affix_rule_nil)
    dcg_panic ("could not locate primitive type TEXT");
}

/*
   Try dump all affix rules
*/
static void try_dump_affix_rules ()
{ int ix, iy;
  if (!dump_affix_rules) return;
  for (ix = 0; ix < all_affix_rules -> size; ix++)
    { affix_rule arule = all_affix_rules -> array[ix];
      int_list lc_nrs = arule -> lc_nrs;
      dcg_eprint ("\t %d: %s (%s, ", ix, arule -> aname, string_from_arule_kind (arule -> kind));
      if (arule -> tag == TAGAffix_synonym)
	dcg_eprint ("synonym of %s", arule -> Affix_synonym.syndef -> aname);
      else
	switch (arule -> empty)
	  { case e_unknown: dcg_eprint ("unk"); break;
	    case e_may_produce_empty: dcg_eprint ("may"); break;
	    case e_always_produces_empty: dcg_eprint ("always"); break;
	    case e_never_produces_empty: dcg_eprint ("never");
	    default: break;
	  };
      dcg_eprint (")");

      if (lc_nrs != int_list_nil)
	{ dcg_eprint ("\tLC nrs:");
	  for (iy = 0; iy < lc_nrs -> size; iy++)
	    dcg_eprint ("%s %d", (iy)?",":"", lc_nrs -> array[iy]);
	};
      dcg_wlog ("");
    };
}

/*
   The driver for the affix rules check across all subgrammars.
   Since the primitive types are going to be needed in the subsequent
   type check of the rules, we preallocate them.
*/
void analyze_affix_rules ()
{ dcg_warning (0, "   analyzing affix rules...");
  init_affix_rules ();
  collect_affix_rules ();
  dcg_hint ("      collected %d affix rules", all_affix_rules -> size);
  identify_affix_rules (); 
  dcg_panic_if_errors ();
  detect_primary_chain_productions ();
  dcg_panic_if_errors ();
  resolve_synonym_rules ();
  typecheck_affix_rules ();
  dcg_panic_if_errors ();
  check_wellformedness ();
  dcg_panic_if_errors ();
  simplify_and_mark_affix_rules ();
  dcg_panic_if_errors ();
  determine_nullability_in_affix_rules ();
  dcg_panic_if_errors ();
  determine_lc_relation_in_affix_rules ();
  create_primitive_type_refs ();
  try_dump_affix_rules ();
}

/*
   Support for rule analysis

   Two affix rules are similar if
   a) they are equivalent (meaning that they are the same modulo synonym definitions)
   b) they are both of the text kind (because the meta check can only be done at runtime
   d) they are both of the real kind 
   c) they are both of the int kind 
   e) they are both of the lattice kind and have a non empty intersection
*/
int similar_affix_rule (affix_rule arule1, affix_rule arule2)
{ affix_rule sdef1 = get_prime_affix_rule (arule1);
  affix_rule sdef2 = get_prime_affix_rule (arule2);
  if (sdef1 == affix_rule_nil) return (0);
  if (sdef2 == affix_rule_nil) return (0);
  if (sdef1 == AFFIX_ERROR) return (0);
  if (sdef2 == AFFIX_ERROR) return (0);
  if (sdef1 == sdef2) return (1);
  if ((arule1 -> kind == arule_text) && (arule2 -> kind == arule_text))
    return (1);
  if ((arule1 -> kind == arule_real) && (arule2 -> kind == arule_real))
    return (1);
  if ((arule1 -> kind == arule_int) && (arule2 -> kind == arule_int))
    return (1);
  if ((arule1 -> kind == arule_lattice) && (arule2 -> kind == arule_lattice))
    return (non_empty_intersection (arule1, arule2));
  return (0);
}

int similar_signatures (signature sig1, signature sig2)
{ int ix;
  if (sig1 -> size != sig2 -> size) return (0);
  for (ix = 0; ix < sig1 -> size; ix++)
    if (!similar_affix_rule (sig1 -> array[ix], sig2 -> array[ix]))
      return (0);
  return (1);
}

/*
   Primitive types are INT, REAL or TEXT
*/
static int is_primitive_type (affix_rule arule, string tname)
{ affix_rule rhs_rule;
  if ((arule == affix_rule_nil) || (arule == AFFIX_ERROR)) return (0);
  rhs_rule = get_prime_affix_rule (arule);
  return ((rhs_rule -> tag == TAGAffix_prim) && streq (rhs_rule -> aname, tname));
}

affix_rule get_primitive_type (arule_kind kind)
{ switch (kind)
    { case arule_error:		return (AFFIX_ERROR);
      case arule_int:  		return (int_arule);
      case arule_real: 		return (real_arule);
      case arule_text: 		return (text_arule);
      case arule_unknown:
      case arule_lattice:
      case arule_tree:
      case arule_any:		break;
      default: dcg_bad_tag (kind, "get_primitive_type");
    };
  return (affix_rule_nil);
}

/*
   Primitive type recognition
*/
int is_a_primitive_type (affix_rule arule)
{ affix_rule rhs_rule;
  if ((arule == affix_rule_nil) || (arule == AFFIX_ERROR)) return (0);
  rhs_rule = get_prime_affix_rule (arule);
  return (rhs_rule -> tag == TAGAffix_prim);
}

int is_integer_type (affix_rule arule)
{ return (is_primitive_type (arule, "INT"));
}

int is_real_type (affix_rule arule)
{ return (is_primitive_type (arule, "REAL"));
}

int is_text_type (affix_rule arule)
{ return (is_primitive_type (arule, "TEXT"));
}

/*
   When the affix rule system is checked, we can identify affix instantiations
   Perhaps MAXBUFLEN should move to some general header file: the lexer.c needs it as well.
*/
#define MAXBUFLEN 1024
affix_rule identify_affix_instance (string vname)
{ char my_buf[MAXBUFLEN];
  int len = strlen (vname);
  char *ptr = my_buf + len - 1;
  affix_rule arule;
  strcpy (my_buf, vname);
  while (('0' <= *ptr) && (*ptr <= '9')) ptr--;
  ptr++;
  *ptr = '\0';
  arule = lookup_affix_tree (affix_space, my_buf);
  return (get_prime_affix_rule (arule));
}
