/*
   File: lc_rel.c
   Determines the left corner relation for affix rules and syntax rules
   Additionally, all nasty left corners (hidden cyclicity) are found. 

   Copyright (C) 2011 Marc Seutter

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.
          
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
      
   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.
  
   CVS ID: "$Id: lc_rel.c,v 1.6 2013/01/03 10:54:41 marcs Exp $"
*/

/* standard includes */
#include <stdio.h>
#include <string.h>

/* support lib includes */
#include <dcg.h>
#include <dcg_alloc.h>
#include <dcg_error.h>
#include <dcg_string.h>
#include <dcg_warshall.h>   

/* local includes */
#include "eag_ds.h"
#include "options.h"
#include "globals.h"
#include "contsens.h"
#include "affix_rules.h"
#include "ast_utils.h"
#include "lc_rel.h"

/*
  This module determines the left corner relation for affix rules and
  syntax rules. A rule A is a left corner of rule B iff

     B =>+ alpha.A.beta where alpha =>* empty

  Note that rules which are a left corner of themselves are left recursive
  by default, which should be forbidden if we are going to generate top down
  parsers.

  We will call a rule A to be a nasty left corner of rule B iff

     B =>+ alpha.A.beta where alpha =>* empty and beta =>* empty

  Note that rules that are a nasty left corner of themselves are rules
  with a (hidden) cyclicity, which we cannot handle in any parser.

  As an example, the following meta grammar should be flagged as
  a pathological case

   A :: B, C.
   B :: A, C;
        "ppp".
   C :: "qqq"; .

   Note that the direct lc relation is constructed as
   rule lc_rnr is called in one step by rule ix iff rel[ix * asize + lc_rnr]
*/
static void determine_direct_lc_relation_in_affix_rules (char **ret_direct_lc, char **ret_nasty_lc)
{ int asize = all_affix_rules -> size;
  char *nasty_rel = (char *) dcg_calloc (asize * asize, sizeof (char));
  char *rel = (char *) dcg_calloc (asize * asize, sizeof (char));
  int ix, iy, iz;
  for (ix = 0; ix < asize * asize; ix++) rel[ix] = 0;
  for (ix = 0; ix < asize * asize; ix++) nasty_rel[ix] = 0;
  for (ix = 0; ix < asize; ix++)
    { affix_rule arule = all_affix_rules -> array[ix];
      affix_alternative_list alts;
      int text_kind = 0;

      /*
	 We need the LC relation for rules of the tree, text and number kinds.
	 For the text kind, we must allow for empty producing affix rules.
	 Note that the LC relation only holds for rules of the same kind.
      */
      switch (arule -> kind)
	{ case arule_text: text_kind = 1;
	  case arule_tree:
	  case arule_int:
	  case arule_real: break;
	  default: continue;
	};
      if (arule -> tag != TAGAffix_alts) continue;

      alts = arule -> Affix_alts.alts;
      for (iy = 0; iy < alts -> size; iy++)
	{ affix_element_list elems = alts -> array[iy] -> elems;
	  for (iz = 0; iz < elems -> size; iz++)
	    { affix_element elem = elems -> array[iz];
	      int empty = 0;
	      switch (elem -> tag)
		{ case TAGAffix_var:
	            { /* Pick number through synonyms, do not mark primitive types as LC's */
	              affix_rule lc_rule = get_prime_affix_rule (elem -> Affix_var.vdef);
	              int lc_rule_nr = lc_rule -> anr;
		      if (lc_rule -> kind != arule -> kind) break;

		      /* Mark as a left corner */
	              if (lc_rule -> tag == TAGAffix_alts)
	                rel[ix * asize + lc_rule_nr] |= 1;
		      empty = (lc_rule -> empty != e_never_produces_empty);

		      /* For text affix rules, determine if the remainder can produce empty */
		      if (text_kind)
		        { int rest_empty = 1;
			  int ridx;
		          for (ridx = iz + 1; ridx < elems -> size; ridx++)
			    if (elems -> array[ridx] -> empty == e_never_produces_empty)
			      { rest_empty = 0;
			        break;
			      };

			  /* If the remainder can produce empty, mark as a nasty left corner */
		          if (rest_empty)
			    nasty_rel[ix * asize + lc_rule_nr] |= 1;
			};
		    }; break;
		  case TAGAffix_text:
		    empty = (elem -> empty != e_never_produces_empty);
		  default: break;
		};
	      if (!empty) break;
	    };
	};
    };
  *ret_direct_lc = rel;
  *ret_nasty_lc = nasty_rel;
}

static void annotate_affix_rules_with_lc_nrs (char *trans_lc, char *trans_nasty_lc)
{ /* Annotate all rules with all ids of the affix rules that are a left corner */
  int asize = all_affix_rules -> size;
  int ix, iy;
  for (ix = 0; ix < asize; ix++)
    { affix_rule arule = all_affix_rules -> array[ix];
      switch (arule -> kind)
	{ case arule_tree:
	  case arule_text:
	  case arule_int:
	  case arule_real: break;
	  default: continue;
	};
      if (arule -> tag != TAGAffix_alts)
	arule -> lc_nrs = int_list_nil;
      else
	{ arule -> lc_nrs = new_int_list ();
	  for (iy = 0; iy < asize; iy++)
	    if (trans_lc[ix * asize + iy])
	      app_int_list (arule -> lc_nrs, iy);
	};

      if (td_parser && (arule -> kind == arule_text) && trans_lc[ix * asize + ix])
	contsens_error_by_gnr (arule -> gnr, arule -> line, arule -> col,
			       "Affix rule %s is left recursive", arule -> aname);

      if (trans_nasty_lc [ix * asize + ix])
	contsens_error_by_gnr (arule -> gnr, arule -> line, arule -> col,
			       "Affix rule %s contains a hidden cyclic production",
			       arule -> aname);
    };
}

/*
   We no longer want the LC* relation as we need the LC+ relation for optimized
   parsing after the reduce step. For the get_ leftcorners, we must remember to
   incorporate the rules themselves
*/
void determine_lc_relation_in_affix_rules ()
{ char *direct_lc, *nasty_lc, *trans_lc, *trans_nasty_lc;
  int asize = all_affix_rules -> size;
  dcg_hint ("      determining affix rule leftcorner relation");
  determine_direct_lc_relation_in_affix_rules (&direct_lc, &nasty_lc);
  trans_lc = dcg_warshall (asize, direct_lc);
  trans_nasty_lc = dcg_warshall (asize, nasty_lc);
  annotate_affix_rules_with_lc_nrs (trans_lc, trans_nasty_lc);

  /* Detach all allocated arrays */
  dcg_detach ((void **) &direct_lc);
  dcg_detach ((void **) &nasty_lc);
  dcg_detach ((void **) &trans_lc);
  dcg_detach ((void **) &trans_nasty_lc);
}

/*
   Determine the direct (one step) left corner relation between syntax rules
   Rule lc_rnr is a direct lc of rule rnr <=> rel[rnr * size + lc_rnr]

   Note that we must check the rest of the alternative to determine whether
   a left corner is also a nasty left corner.
*/
static void determine_direct_lcs_in_member (member m, fwo_group_list fwol, int rest,
					    int rnr, char *rel, char *nasty_rel)
{ int size = all_syntax_rules -> size;
  int rest_empty = 1;
  rule rdef;
  int ix;
  if (m -> tag != TAGRes_call)
    return;
 
  /* Ignore predicates/semipredicates */
  rdef = m -> Res_call.rdef;
  if ((rdef -> tag == TAGExt_rule) || (rdef -> tag == TAGQuasi_rule))
    return;
  if (rdef -> empty == e_always_produces_empty)
    return;

  /* We have a left corner */
  rel[rnr * size + rdef -> rnr] = 1;

  for (ix = rest; ix < fwol -> size; ix++)
    if (!fwol -> array[ix] -> empty)
      { rest_empty = 0;
        break;
      };

  if (rest_empty)
    { /* We have a nasty left corner */
      nasty_rel[rnr * size + rdef -> rnr] = 1;
    };
}

static void determine_direct_lcs_in_alt (alternative alt, int rnr, char *rel, char *nasty_rel)
{ fwo_group_list fwol = alt -> members;
  int ix;
  for (ix = 0; ix < fwol -> size; ix++)
    { fwo_group fwo = fwol -> array[ix];
      switch (fwo -> tag)
        { case TAGSingle:
	    determine_direct_lcs_in_member (fwo -> Single.mem, fwol, ix + 1, rnr, rel, nasty_rel);
	    break;
	  case TAGFwo:
	    { member_list mems = fwo -> Fwo.mems;
              int iy;
	      for (iy = 0; iy < mems -> size; iy++)
      	        determine_direct_lcs_in_member (mems -> array[iy], fwol, ix + 1,
						rnr, rel, nasty_rel);
	    };
	    break;
	  default: dcg_bad_tag (fwo -> tag, "determine_direct_lcs_in_alt");
	};
      if (!fwo -> empty)
	break;
    };
}

static void determine_direct_lcs_in_group (group grp, int rnr, char *rel, char *nasty_rel)
{ alternative_list alts = grp -> alts;
  int ix;
  for (ix = 0; ix < alts -> size; ix++)
    determine_direct_lcs_in_alt (alts -> array[ix], rnr, rel, nasty_rel);
}

static void determine_direct_lcs_in_defs (definition_list defs, int rnr, char *rel, char *nasty_rel)
{ int ix;
  for (ix = 0; ix < defs -> size; ix++)
    determine_direct_lcs_in_group (defs -> array[ix] -> grp, rnr, rel, nasty_rel);
}

static void determine_direct_lcs_in_rule (rule drule, char *rel, char *nasty_rel)
{ int rnr = drule -> rnr;

  /* Do not check predicates/semipredicates */
  if (drule -> empty == e_always_produces_empty)
    return;
  switch (drule -> tag)
    { case TAGDefs:
	determine_direct_lcs_in_defs (drule -> Defs.defs, rnr, rel, nasty_rel);
        break;
      case TAGAnonymous_option:
        determine_direct_lcs_in_group (drule -> Anonymous_option.grp, rnr, rel, nasty_rel);
	break;
      case TAGAnonymous_group:
        determine_direct_lcs_in_group (drule -> Anonymous_group.grp, rnr, rel, nasty_rel);
      default: break;
    };
}

static void determine_direct_lc_relation_for_syntax_rules (char **ret_lc, char **ret_nasty_lc)
{ int size = all_syntax_rules -> size;
  char *nasty_rel = (char *) dcg_calloc (size * size, sizeof (char));
  char *rel = (char *) dcg_calloc (size * size, sizeof (char));
  int ix;
  for (ix = 0; ix < size * size; ix++) rel[ix] = 0;
  for (ix = 0; ix < size * size; ix++) nasty_rel[ix] = 0;
  for (ix = 0; ix < size; ix++)
    determine_direct_lcs_in_rule (all_syntax_rules -> array[ix], rel, nasty_rel);

  *ret_nasty_lc = nasty_rel;
  *ret_lc = rel;
}

char *rules_lc_rel;
static void check_for_nasty_rules (char *direct_nasty_lc_rel, char *nasty_lc_rel)
{ int size = all_syntax_rules -> size;
  int ix;
  for (ix = 0; ix < size; ix++)
    { /* Maybe also dump the left corner relation */
      rule drule = all_syntax_rules -> array[ix];
      prule pr = drule -> rspec -> pr;
      plhs lhs = drule -> rspec -> pr -> lhs;
      if (dump_properties && (drule -> empty != e_always_produces_empty))
        { int iy;
	  dcg_wlog ("Left corners of rule %s", drule -> rspec -> canonic_name);
	  for (iy = 0; iy < size; iy++)
	    if (rules_lc_rel[ix * size + iy])
	      dcg_wlog ("      %s", all_syntax_rules -> array[iy] -> rspec -> canonic_name);
	};

      if (td_parser && rules_lc_rel[ix * size + ix])
	contsens_error_by_gnr (pr -> gnr, lhs -> line, lhs -> col,
			       "Rule %s is left recursive", drule -> rspec -> canonic_name);
      if (nasty_lc_rel [ix * size + ix])
	{ contsens_error_by_gnr (pr -> gnr, lhs -> line, lhs -> col,
				 "Rule %s contains a hidden cyclic production",
				 drule -> rspec -> canonic_name);
	  if (dump_properties)
	    { int iy;
	      dcg_wlog ("Nasty left corners of rule %s", drule -> rspec -> canonic_name);
	      for (iy = 0; iy < size; iy++)
                if (direct_nasty_lc_rel[ix * size + iy])
	          dcg_wlog ("      %s", all_syntax_rules -> array[iy] -> rspec -> canonic_name);
	    };
	};
    };
}

void determine_lc_relation_in_rules ()
{ int size = all_syntax_rules -> size;
  char *direct_nasty_lc_rel;
  char *direct_lc_rel;
  char *nasty_lc_rel;
  
  dcg_hint ("      determining syntax rule leftcorner relation");
  determine_direct_lc_relation_for_syntax_rules (&direct_lc_rel, &direct_nasty_lc_rel);
  nasty_lc_rel = dcg_warshall (size, direct_nasty_lc_rel);
  rules_lc_rel = dcg_warshall (size, direct_lc_rel);
  check_for_nasty_rules (direct_nasty_lc_rel, nasty_lc_rel);

  /* Detach the arrays that we no longer need */
  dcg_detach ((void **) &direct_lc_rel);
  dcg_detach ((void **) &direct_nasty_lc_rel);
  dcg_detach ((void **) &nasty_lc_rel);
}
