/*
   File: lookahead.c
   Calculates first and follow sets for every nonterminal.
   Determines director sets for alternatives (this does include the
   LCiN alternatives in the red_N () functions

   Copyright (C) 2012 Marc Seutter

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.

   CVS ID: "$Id: lookahead.c,v 1.5 2012/11/14 16:19:46 marcs Exp $"
*/


/* standard includes */
#include <stdio.h>
#include <string.h>

/* libdcg includes */
#include <dcg.h>
#include <dcg_alloc.h>
#include <dcg_error.h>
#include <dcg_string.h>

/* libeagbase includes */
#include <ebase_ds.h>
#include <ebase_input.h>

/* local includes */
#include "eag_ds.h"
#include "ast_utils.h"
#include "options.h"
#include "globals.h"
#include "lookahead.h"

/*
   Functions to dump transitions sets
*/
static void dump_transition_pair (trans_pair pair)
{ switch (pair -> class)
    { case TransError:
	dcg_wlog ("Error");
	break;
      case TransTerminal:
        { terminal term = all_terminals -> array[pair -> number];
	  dcg_wlog ("Terminal %d ('%s')", pair -> number, term -> origin);
	}; break;
      case TransLexNont:
	{ lex_nont lex_nont = all_lex_nonts -> array[pair -> number];
	  rule lrule = all_syntax_rules -> array[lex_nont -> rule_nr];
	  dcg_wlog ("%s", lrule -> rspec -> canonic_name);
	}; break;
      case TransWord:
	dcg_wlog ("$WORD");
	break;
      case TransMatch:
	{ nfa regexp_nfa = all_regexp_nfas -> array[pair -> number];
	  dcg_wlog ("$MATCH %d ('%s')", pair -> number, regexp_nfa -> origin);
	}; break;
      case TransSkip:
	{ nfa regexp_nfa = all_regexp_nfas -> array[pair -> number];
	  dcg_wlog ("$SKIP %d ('%s')", pair -> number, regexp_nfa -> origin);
	}; break;
      case TransOther:
	dcg_wlog ("$OTHER");
	break;
      case TransAny:
	dcg_wlog ("$ANY");
	break;
      case TransWhiteSpace:
	dcg_wlog ("$<WS>");
	break;
      case TransEndOfText:
	dcg_wlog ("$EOT");
	break;
      default: dcg_bad_tag (pair -> class, "dump_transition_pair");
    };
}

static void dump_transition_set (trans_pair_list set)
{ if (set == trans_pair_list_nil) dcg_wlog ("<>");
  else if (set -> size == 0) dcg_wlog ("[]");
  else
    { int ix;
      dcg_wlog ("");
      for (ix = 0; ix < set -> size; ix++)
	dump_transition_pair (set -> array[ix]);
    };
}

/*
   The initial determination of the first sets actually does two things:
   It initializes the first sets of the alternatives and rules and it
   determines the first sets of those members who correspond to a trellis
   transition at runtime (terminal, lexicon nonterminal and any of the
   special quasi nonterminals: $WORD, $MATCH, $SKIP, $OTHER, $ANY and $EOT.
*/
static int class_from_name (char *rname)
{ if (streq (rname, "$WORD")) return (TransWord);
  else if (streq (rname, "$MATCH")) return (TransMatch);
  else if (streq (rname, "$SKIP"))  return (TransSkip);
  else if (streq (rname, "$OTHER")) return (TransOther);
  else if (streq (rname, "$ANY"))   return (TransAny);
  else if (streq (rname, "$EOT"))   return (TransEndOfText);
  return (TransError);
}

static int regexp_nr_from_args (affix_term_list args)
{ affix_term arg;
  if (args -> size != 1) return (0);
  arg = args -> array[0];
  if (arg -> tag != TAGRegexp) return (0);
  return (arg -> Regexp.termnr);
}

static void determine_initial_first_set_from_call (member m)
{ rule rdef = m -> Res_call.rdef;
  spec rspec = rdef -> rspec;
  string_list rname_parts = rspec -> rname_parts;
  affix_term_list args = m -> Res_call.args;
  switch (rdef -> tag)
    { case TAGDefs:
	if (rspec -> rkind == r_lexicon)
	  m -> first = new_trans_pair (TransLexNont, rspec -> lnr);
	break;
      case TAGQuasi_rule:
        { int class, number;
	  if (rname_parts -> size != 1) return;
	  class = class_from_name (rname_parts -> array[0]);
	  if (class == TransError) return;
	  number = regexp_nr_from_args (args);
	  m -> first = new_trans_pair (class, number);
	};
      case TAGAnonymous_option:
      case TAGAnonymous_group:
      case TAGExt_rule:
	break;
      default: dcg_bad_tag (rdef -> tag, "determine_initial_first_set_from_call");
    };
}

static void determine_initial_first_set_from_terminal (member m)
{ int is_match = m -> Res_term.is_regexp;
  int number = m -> Res_term.termnr;
  m -> first = new_trans_pair ((is_match)?TransMatch:TransTerminal, number);
}

static void determine_initial_first_set_in_member (member m)
{ switch (m -> tag)
    { case TAGRes_call: determine_initial_first_set_from_call (m); break;
      case TAGRes_term: determine_initial_first_set_from_terminal (m);
      case TAGRes_guard:
      case TAGOp: break;
      default: dcg_bad_tag (m -> tag, "determine_initial_first_set_in_member");
    };
}

static void determine_initial_first_sets_in_fwo (fwo_group fwo)
{ switch (fwo -> tag)
    { case TAGSingle: determine_initial_first_set_in_member (fwo -> Single.mem); break;
      case TAGFwo:
	{ member_list fwo_mems = fwo -> Fwo.mems;
	  int ix;
	  for (ix = 0; ix < fwo_mems -> size; ix++)
	    determine_initial_first_set_in_member (fwo_mems -> array[ix]);
	}; break;
      default: dcg_bad_tag (fwo -> tag, "determine_initial_first_sets_in_fwo");
    };
}

static void determine_initial_first_sets_in_alt (alternative alt)
{ fwo_group_list members = alt -> members;
  int ix;
  alt -> first = new_trans_pair_list ();
  for (ix = 0; ix < members -> size; ix++)
    determine_initial_first_sets_in_fwo (members -> array[ix]);
}

static void determine_initial_first_sets_in_group (group grp)
{ alternative_list alts = grp -> alts;
  int ix;
  for (ix = 0; ix < alts -> size; ix++)
    determine_initial_first_sets_in_alt (alts -> array[ix]);
}

static void determine_initial_first_sets_in_defs (definition_list defs)
{ int ix;
  for (ix = 0; ix < defs -> size; ix++)
    determine_initial_first_sets_in_group (defs -> array[ix] -> grp);
}

static void determine_initial_first_sets_in_rule (rule srule)
{ if (srule -> empty == e_always_produces_empty) return;
  switch (srule -> tag)
    { case TAGDefs: 
	srule -> first = new_trans_pair_list ();
	srule -> follow = new_trans_pair_list ();
	determine_initial_first_sets_in_defs (srule -> Defs.defs);
	break;
      case TAGAnonymous_option:
	srule -> first = new_trans_pair_list ();
	srule -> follow = new_trans_pair_list ();
	determine_initial_first_sets_in_group (srule -> Anonymous_option.grp);
	break;
      case TAGAnonymous_group:
	srule -> first = new_trans_pair_list ();
	srule -> follow = new_trans_pair_list ();
	determine_initial_first_sets_in_group (srule -> Anonymous_group.grp);
	break;
      case TAGExt_rule: return;
      default: dcg_bad_tag (srule -> tag, "determine_initial_first_sets_in_rule");
    };
}

static void determine_initial_first_sets ()
{ int ix;
  for (ix = 0; ix < all_syntax_rules -> size; ix++)
    determine_initial_first_sets_in_rule (all_syntax_rules -> array[ix]);
  if (root_rule -> first == trans_pair_list_nil)
    root_rule -> first = new_trans_pair_list ();
  if (root_rule -> follow == trans_pair_list_nil)
    root_rule -> follow = new_trans_pair_list ();
}

/*
   Two functions to aid in accumulating first and follow sets.
*/
static void add_pair_to_trans_set (trans_pair pair, trans_pair_list set, int *change)
{ int pos;
  for (pos = 0; pos < set -> size; pos++)
    { trans_pair curr = set -> array[pos];
      if (curr -> class > pair -> class) break;		/* Reached point of insertion */
      if (curr -> class < pair -> class) continue;	/* Proceed */
      if (curr -> number > pair -> number) break;	/* Similar */
      if (curr -> number < pair -> number) continue;

      /* Absorb */
      return;
    };

  /* Add the new (class, nr) to the set and mark the change */
  ins_trans_pair_list (set, pos, attach_trans_pair (pair));
  *change = 1;
}

static void add_set_to_trans_set (trans_pair_list alt_set, trans_pair_list set, int *change)
{ int ix;
  for (ix = 0; ix < alt_set -> size; ix++)
    add_pair_to_trans_set (alt_set -> array[ix], set, change);
}

/*
   In subsequent passes, the first sets of alternatives and rules will
   grow until they reach a fixed point.
*/
static void accumulate_first_set_in_member (member m, trans_pair_list set, int *change)
{ if (m -> first != trans_pair_nil)
    add_pair_to_trans_set (m -> first, set, change);
  else if (m -> tag == TAGRes_call)
    { /* The only increase can now only come from calls */
      rule rdef = m -> Res_call.rdef;
      if (rdef -> empty == e_always_produces_empty)
	return;

      /* Consistency check */
      if (rdef -> first == trans_pair_list_nil)
        dcg_internal_error ("accumulate_first_set_in_member");

      /* Ok, this call will add to the alternative's first set */
      add_set_to_trans_set (rdef -> first, set, change);
    };
}

static void accumulate_first_sets_in_fwo (fwo_group fwo, trans_pair_list set, int *change)
{ switch (fwo -> tag)
    { case TAGSingle: accumulate_first_set_in_member (fwo -> Single.mem, set, change); break;
      case TAGFwo:
	{ member_list fwo_mems = fwo -> Fwo.mems;
	  int ix;
	  for (ix = 0; ix < fwo_mems -> size; ix++)
	    accumulate_first_set_in_member (fwo_mems -> array[ix], set, change);
	}; break;
      default: dcg_bad_tag (fwo -> tag, "accumulate_first_sets_in_fwo");
    };
}

static void accumulate_first_sets_in_alt (alternative alt, trans_pair_list set, int *change) 
{ fwo_group_list members = alt -> members;
  int ix;
  for (ix = 0; ix < members -> size; ix++)
    { accumulate_first_sets_in_fwo (members -> array[ix], alt -> first, change);
      if (!members -> array[ix] -> empty)
        break;
    };
  add_set_to_trans_set (alt -> first, set, change);
}

static void accumulate_first_sets_in_group (group grp, trans_pair_list set, int *change)
{ alternative_list alts = grp -> alts;
  int ix;
  for (ix = 0; ix < alts -> size; ix++)
    accumulate_first_sets_in_alt (alts -> array[ix], set, change);
}

static void accumulate_first_sets_in_defs (definition_list defs, trans_pair_list set, int *change)
{ int ix;
  for (ix = 0; ix < defs -> size; ix++)
    accumulate_first_sets_in_group (defs -> array[ix] -> grp, set, change);
}

static void accumulate_first_sets_in_rule (rule srule, int *change)
{ if (srule -> empty == e_always_produces_empty) return;
  switch (srule -> tag)
    { case TAGDefs: 
	accumulate_first_sets_in_defs (srule -> Defs.defs, srule -> first, change);
	break;
      case TAGAnonymous_option:
	accumulate_first_sets_in_group (srule -> Anonymous_option.grp, srule -> first, change);
	break;
      case TAGAnonymous_group:
	accumulate_first_sets_in_group (srule -> Anonymous_group.grp, srule -> first, change);
	break;
      case TAGExt_rule: return;
      default: dcg_bad_tag (srule -> tag, "accumulate_first_sets_in_rule");
    };
}

static void accumulate_first_sets (int *change)
{ int ix;
  for (ix = 0; ix < all_syntax_rules -> size; ix++)
    accumulate_first_sets_in_rule (all_syntax_rules -> array[ix], change);
}

static void try_dump_first_sets ()
{ int ix;
  if (!dump_properties) return;
  for (ix = 0; ix < all_syntax_rules -> size; ix++)
    { rule srule = all_syntax_rules -> array[ix];
      spec rspec = srule -> rspec;
      dcg_eprint ("First set of %s %s is: ",
		  string_from_rule_type (rspec -> rtype), rspec -> canonic_name);
      dump_transition_set (srule -> first);
    };
}

static void determine_first_sets ()
{ int change, nr_passes;
  dcg_hint ("      determining first sets");
  determine_initial_first_sets ();
  nr_passes = 0;
  do
    { change = 0;
      nr_passes++;
      accumulate_first_sets (&change);
    }
  while (change);
  dcg_hint ("      needed %d pass%s for first set calculation",
            nr_passes, (nr_passes == 1)?"":"es");
  try_dump_first_sets ();
}

/*
   The follow set calculation start by adding the $EOT transition
   to the follow set of the start rule
*/
static void determine_initial_follow_set ()
{ trans_pair eot = new_trans_pair (TransEndOfText, 0);
  app_trans_pair_list (root_rule -> follow, eot);
}

/* Check if this member is a call to a syntax rule in the grammar */
static int member_is_a_proper_call (member m, rule *called_rule)
{ /* Check if this is a call allright */
  rule rdef;
  spec rspec;
  if (m -> tag != TAGRes_call) return (0);
  rdef = m -> Res_call.rdef;

  /* Not to predicates, external or quasi rules */
  if (rdef -> empty == e_always_produces_empty) return (0);
  if (rdef -> tag == TAGExt_rule) return (0);
  if (rdef -> tag == TAGQuasi_rule) return (0);
  rspec = rdef -> rspec;

  /* Not to lexicon nonterminals */
  if (rspec -> rkind == r_lexicon) return (0);
  if (rspec -> rkind == r_fact) return (0);

  /* This one fits */
  *called_rule = rdef;
  return (1);
}

/*
   We have a rule of sort:
   srule -> alpha, called_rule, beta

   Add the first of beta to the follow of called_rule.
   Add the follow of srule to the follow of called_rule if beta may produce empty
*/
static void add_first_of_member (rule called_rule, member m, int *change)
{ rule next_rule;
  if (m -> first != trans_pair_nil)
    add_pair_to_trans_set (m -> first, called_rule -> follow, change);
  else if (member_is_a_proper_call (m, &next_rule))
    add_set_to_trans_set (next_rule -> first, called_rule -> follow, change);
}

static void add_first_of_fwo (rule called_rule, fwo_group fwo, int *change)
{ switch (fwo -> tag)
    { case TAGSingle:
	add_first_of_member (called_rule, fwo -> Single.mem, change);
	break;
      case TAGFwo:
	{ member_list mems = fwo -> Fwo.mems;
	  int ix;
	  for (ix = 0; ix < mems -> size; ix++)
	    add_first_of_member (called_rule, mems -> array[ix], change);
	}; break;
      default: dcg_bad_tag (fwo -> tag, "add_first_of_fwo");
    };
}

static void add_followers (rule called_rule, fwo_group_list members, int from, rule srule,
			   int *change)
{ int ix;
  for (ix = from; ix < members -> size; ix++)
    { fwo_group fwo = members -> array[ix];
      add_first_of_fwo (called_rule, fwo, change);
      if (!fwo -> empty) return;
    };

  /* We reach this point of all of the following members in the alternative could produce empty */
  add_set_to_trans_set (srule -> follow, called_rule -> follow, change);
}

static void accumulate_follow_sets_for_calls (rule srule, fwo_group fwo,
					      fwo_group_list members, int from, int *change)
{ rule called_rule;
  switch (fwo -> tag)
    { case TAGSingle:
	{ rule called_rule;
	  if (member_is_a_proper_call (fwo -> Single.mem, &called_rule))
	    add_followers (called_rule, members, from, srule, change);
	}; break;
      case TAGFwo:
	{ member_list mems = fwo -> Fwo.mems;
	  int ix;
	  for (ix = 0; ix < mems -> size; ix++)
	    { member m = mems -> array[ix];
	      if (member_is_a_proper_call (m, &called_rule))
		{ /*
		     If one of the members in free word order is a call,
		     add the first of the other members (because they may
		     follow) and the firsts from the rest of the alternative
		     (because this call may also be the last in free word order
		  */
		  int iy;
		  for (iy = 0; iy < mems -> size; iy++)
		    if (ix != iy)
		      add_first_of_member (called_rule, mems -> array[ix], change);
		  add_followers (called_rule, members, from, srule, change);
	 	};
	    };
	}; break;
      dcg_bad_tag (fwo -> tag, "accumulate_follow_sets_for_calls");
    };
}

static void accumulate_follow_sets_in_alt (rule srule, alternative alt, int *change)
{ fwo_group_list members = alt -> members;
  int ix;
  for (ix = 0; ix < members -> size; ix++)
    accumulate_follow_sets_for_calls (srule, members -> array[ix], members, ix + 1, change);
}

static void accumulate_follow_sets_in_group (rule srule, group grp, int *change)
{ alternative_list alts = grp -> alts;
  int ix;
  for (ix = 0; ix < alts -> size; ix++)
    accumulate_follow_sets_in_alt (srule, alts -> array[ix], change);
}

static void accumulate_follow_sets_in_defs (rule srule, definition_list defs, int *change)
{ int ix;
  for (ix = 0; ix < defs -> size; ix++)
    accumulate_follow_sets_in_group (srule, defs -> array[ix] -> grp, change);
}

static void accumulate_follow_sets_in_rule (rule srule, int *change)
{ if (srule -> empty == e_always_produces_empty) return;
  switch (srule -> tag)
    { case TAGDefs: 
	accumulate_follow_sets_in_defs (srule, srule -> Defs.defs, change);
	break;
      case TAGAnonymous_option:
	accumulate_follow_sets_in_group (srule, srule -> Anonymous_option.grp, change);
	break;
      case TAGAnonymous_group:
	accumulate_follow_sets_in_group (srule, srule -> Anonymous_group.grp, change);
	break;
      case TAGExt_rule: return;
      default: dcg_bad_tag (srule -> tag, "accumulate_follow_sets_in_rule");
    };
}

static void accumulate_follow_sets (int *change)
{ int ix;
  for (ix = 0; ix < all_syntax_rules -> size; ix++)
    accumulate_follow_sets_in_rule (all_syntax_rules -> array[ix], change);
}

static void try_dump_follow_sets ()
{ int ix;
  if (!dump_properties) return;
  for (ix = 0; ix < all_syntax_rules -> size; ix++)
    { rule srule = all_syntax_rules -> array[ix];
      spec rspec = srule -> rspec;
      dcg_eprint ("Follow set of %s %s is: ",
		  string_from_rule_type (rspec -> rtype), rspec -> canonic_name);
      dump_transition_set (srule -> follow);
    };
}

static void determine_follow_sets ()
{ int change, nr_passes;
  dcg_hint ("      determining follow sets");
  determine_initial_follow_set ();
  nr_passes = 0;
  do
    { change = 0;
      nr_passes++;
      accumulate_follow_sets (&change);
    }
  while (change);
  dcg_hint ("      needed %d pass%s for follow set calculation",
            nr_passes, (nr_passes == 1)?"":"es");
  try_dump_follow_sets ();
}

void determine_lookahead_sets ()
{ determine_first_sets ();
  determine_follow_sets ();
}
