/*
   File: lattices.c
   Analyzes lattice construction

   Copyright (C) 2011 Marc Seutter

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.

   CVS ID: "$Id: lattices.c,v 1.18 2012/08/16 20:51:15 marcs Exp $"
*/

/* standard includes */
#include <stdio.h>
#include <string.h>

/* support lib includes */
#include <dcg.h>
#include <dcg_alloc.h>
#include <dcg_error.h>
#include <dcg_string.h>
#include <dcg_plist.h>
#include <dcg_plist_ops.h>
#include <dcg_warshall.h>

/* libeagbase includes */
#include <ebase_ds.h>

/* local includes */
#include "eag_ds.h"
#include "options.h"
#include "globals.h"
#include "contsens.h"
#include "element_tree.h"

/*
   During the first phase of the analysis, the affix domain subset relation is calculated
   in the usual way (namely) by calculating the one step subset relation and then taking
   the reflexive transitive closure. As a side effect, the array of all affix elements
   is set up.

   Following this calculation, the 'element of' relation between affix terminals and
   affix nonterminals is calculated. We need these relations to determine whether an
   affix expression is a subset of a specific affix nonterminal
*/
static element_tree element_tree_space;
static void init_lattices ()
{ element_tree_space = element_tree_nil;
}

static void collect_lattice_element (affix_element elem)
{ string tname = elem -> Affix_term.tname;
  element_tree ptr = enter_element_tree (&element_tree_space, tname);
  element new_elt;
  if (ptr -> def == element_nil)
    { /* Not yet collected */
      new_elt = new_element (attach_string (tname));
      new_elt -> enr = all_elements -> size;		/* self ref */
      app_element_list (all_elements, new_elt);
      ptr -> def = new_elt;
    }

  /* Remember the identification */
  elem -> Affix_term.edef = ptr -> def;
}

static void mark_subset_relation_for_variable (int anr, int asize, char *rel, affix_element elem)
{ int sub_nr = elem -> Affix_var.vdef -> anr;
  rel[anr * asize + sub_nr] = 1;		/* var is subset of rule */
}

static void mark_subset_relation_in_element (int anr, int asize, char *rel, affix_element elem)
{ switch (elem -> tag)
    { case TAGAffix_term:
	collect_lattice_element (elem);
	break;
      case TAGAffix_var:
	mark_subset_relation_for_variable (anr, asize, rel, elem);
	break;
      default: dcg_bad_tag (elem -> tag, "mark_subset_relation_in_element");
    };
}

static void mark_subset_relation_in_affix_rule (affix_rule arule, int asize, char *rel)
{ /* Add reflexivity */
  int anr = arule -> anr;
  rel[anr * asize + anr] = 1;
  switch (arule -> tag)
    { case TAGAffix_synonym:
	{ /* For synonyms, enter a reflexive subset relation */
	  affix_rule syn_def = arule -> Affix_synonym.syndef;
	  int syn_nr = syn_def -> anr;
	  rel[anr * asize + syn_nr] = 1;	/* syn is subset of rule */
	  rel[syn_nr * asize + anr] = 1;	/* rule is subset of syn */
	}; break;
      case TAGAffix_alts:
	{ affix_element_list elems = arule -> Affix_alts.alts -> array[0] -> elems;
	  int iy;
	  for (iy = 0; iy < elems -> size; iy++)
	    mark_subset_relation_in_element (anr, asize, rel, elems -> array[iy]);
	}; break;
      default: dcg_bad_tag (arule -> tag, "mark_subset_relation_in_affix_rule");
    };
}

static char *subset_relation;
static char *element_relation;
static void deduce_subset_relation ()
{ int asize = all_affix_rules -> size;
  char *rel = dcg_calloc (asize * asize, sizeof (char));
  int ix;

  /* Clear the matrix, although this should have been done by dcg_calloc */
  dcg_hint ("      collecting lattice elements");
  for (ix = 0; ix < asize * asize; ix++) rel[ix] = 0;
  for (ix = 0; ix < asize; ix++)
    { affix_rule arule = all_affix_rules -> array[ix];
      if (arule -> kind != arule_lattice) continue;
      mark_subset_relation_in_affix_rule (arule, asize, rel);
    };

  /* Now take the transitive closure */
  dcg_hint ("      collected %d lattice elements", all_elements -> size);
  dcg_hint ("      deducing lattice subset relation");
  subset_relation = dcg_warshall (asize, rel);
}

static void add_elements_to_relation (int anr, affix_element elem)
{ int asize = all_affix_rules -> size;
  int esize = all_elements -> size;
  int enr, ix;
  if (elem -> tag != TAGAffix_term) return;
  enr = elem -> Affix_term.edef -> enr;

  /* Iterate over affix rules to locate all rules anr is a subset of */
  for (ix = 0; ix < asize; ix++)
    if (subset_relation[ix * asize + anr])
      element_relation[ix * esize + enr] = 1;
}

static void deduce_element_relation ()
{ int asize = all_affix_rules -> size;
  int esize = all_elements -> size;
  int ix;

  /* Create relation and clear it */
  dcg_hint ("      deducing lattice element relation");
  if (esize == 0) esize = 1;	/* To cope with a universe without lattices */
  element_relation = dcg_calloc (asize * esize, sizeof (char));
  for (ix = 0; ix < asize * esize; ix++) element_relation[ix] = 0;
  for (ix = 0; ix < asize; ix++)
    { affix_rule arule = all_affix_rules -> array[ix];
      affix_element_list elems;
      int iy;
      if (arule -> kind != arule_lattice) continue;
      if (arule -> tag != TAGAffix_alts) continue;
      elems = arule -> Affix_alts.alts -> array[0] -> elems;
      for (iy = 0; iy < elems -> size; iy++)
        add_elements_to_relation (arule -> anr, elems -> array[iy]);
    };
}

static void try_dump_lattice_relations ()
{ int asize = all_affix_rules -> size;
  int esize = all_elements -> size;
  int ix, iy;
  if (!dump_lattices) return;

  /* Dump lattice nonterminals */
  dcg_wlog ("Dump of all lattice affix rules");
  for (ix = 0; ix < asize; ix++)
    { affix_rule arule = all_affix_rules -> array[ix];
      int first = 1;
      if (arule -> kind != arule_lattice) continue;
      dcg_eprint ("%5d: %s is a subset of ", ix, arule -> aname);
      for (iy = 0; iy < asize; iy++)
	if (subset_relation[iy * asize + ix])
	  { dcg_eprint ("%s%s", (first)?"":", ", all_affix_rules -> array[iy] -> aname);
	    first = 0;
	  };
      dcg_wlog ("");
    };

  /* Dump elements */
  dcg_wlog ("Dump of all lattice elements");
  for (ix = 0; ix < all_elements -> size; ix++)
    { element elt = all_elements -> array[ix];
      int first = 1;
      dcg_eprint ("%5d: %s is an element of ", ix, elt -> name);
      for (iy = 0; iy < asize; iy++)
	if (element_relation[iy * esize + ix])
	  { dcg_eprint ("%s%s", (first)?"":", ", all_affix_rules -> array[iy] -> aname);
	    first = 0;
	  };
      dcg_wlog ("");
    };
}

/*
   The interference matrix will reflect lattice elements interfere
   with each other by either occurring together in the right hand
   side of an affix nonterminal (direct or indirect) or if they
   occur together in a formal or actual affix expression (direct
   or indirect) i.e. in an affix expression of the form
   'elt1 | elt2 | VRULE' elt1, elt2 interfere with each other and
   with all elements in the right hand side of VRULE. In most cases
   this is an overspecification. However, we know from linguistic
   (or kees) grammars that we need this safety.
*/
static char* interference_matrix;
static void create_initial_interference_matrix ()
{ int asize = all_affix_rules -> size;
  int esize = all_elements -> size;
  int ix, iy, iz;

  /* Allocate interference matrix and clear it */
  dcg_hint ("      setting up initial lattice element interference matrix");
  if (esize == 0) esize = 1;	/* To cope with a universe without lattices */
  interference_matrix = (char *) dcg_calloc (esize * esize, sizeof (char));
  for (ix = 0; ix < esize * esize; ix++)
    interference_matrix[ix] = 0;

  /* Fill interference matrix */
  for (ix = 0; ix < asize; ix++)
    { affix_rule arule = all_affix_rules -> array[ix];
      if (arule -> kind != arule_lattice) continue;

      /* check the RHS for this affix nonterminal */
      for (iy = 0; iy < esize; iy++)
	{ if (!element_relation[ix * esize + iy])
	    continue;

	  /* We have an element in the rhs */
	  for (iz = 0; iz < iy; iz++)
	    { if (!element_relation[ix * esize + iz])
		continue;

	      /* We have two different elements in the RHS, they interfere */
	      interference_matrix[iy * esize + iz] = 1;
	      interference_matrix[iz * esize + iy] = 1;
	    }
        }
    }
}

void analyze_lattices_in_affix_rules ()
{ dcg_warning (0, "   analyzing lattices in affix rules...");
  init_lattices ();
  deduce_subset_relation ();
  deduce_element_relation ();
  create_initial_interference_matrix ();
  try_dump_lattice_relations ();
}

/*
   Following the construction of the relations and the collection of the elements
   we are able to identify affix terminals and then determine whether they are an
   element of an affix domain pertaining to an affix nonterminal.
*/
element identify_lattice_element (string ename)
{ return (lookup_element_tree (element_tree_space, ename));
}

int element_in_affix_rule (element elt, affix_rule arule)
{ int esize = all_elements -> size;
  return (element_relation[arule -> anr * esize + elt -> enr ]);
}

int subset_of_affix_rule (affix_rule subset, affix_rule arule)
{ int asize = all_affix_rules -> size;
  return (subset_relation[arule -> anr * asize + subset -> anr]);
}

int non_empty_intersection (affix_rule arule1, affix_rule arule2)
{ int ix;
  for (ix = 0; ix < all_elements -> size; ix++)
    { element this_element = all_elements -> array[ix];
      if (element_in_affix_rule (this_element, arule1) &&
          element_in_affix_rule (this_element, arule2))
	return (1);
    };
  return (0);
}

void add_element_to_interference_matrix (element elt1, affix_rule arule)
{ int esize = all_elements -> size;
  int iy;

  /* check the RHS for this affix nonterminal */
  for (iy = 0; iy < esize; iy++)
    { element elt2 = all_elements -> array[iy];
      if (!element_relation[arule -> anr * esize + iy])
        continue;

      /* We have an element in the rhs, that might interfere, but not with ourself */
      if (elt1 -> enr == iy)
        continue;

      dcg_hint ("Adding non trivial interference between %s and %s", elt1 -> name, elt2 -> name);
      interference_matrix[elt1 -> enr * esize + iy] = 1;
      interference_matrix[iy * esize + elt1 -> enr] = 1;
    };
}

void add_arule_to_interference_matrix (affix_rule arule1, affix_rule arule)
{ int esize = all_elements -> size;
  int ix;

  /* Check the RHS for this affix nonterminal */
  for (ix = 0; ix < esize; ix++)
    { element elt = all_elements -> array[ix];
      if (!element_relation[arule1 -> anr * esize + ix])
        continue;

      add_element_to_interference_matrix (elt, arule);
    };
}

/*
   Pass 2 of the lattice construction.
   The interference matrix between the affix elements has been setup
   The closure of this interference matrix then describes a graph
   whose connected components corresponds with the super domains
   of our lattice construction
*/
static void try_dump_matrix (char *matrix, char *title)
{ int esize = all_elements -> size;
  int ix, iy;
  if (!dump_lattices) return;
  dcg_wlog ("%s:", title);
  for (ix = 0; ix < esize; ix++)
    { dcg_eprint ("%3d:", ix);
      for (iy = 0; iy < esize; iy++)
	dcg_eprint ("%2d", matrix [ix * esize + iy]);
      dcg_wlog ("");
    };
}

static void allocate_super_domains (char *closure)
{ int esize = all_elements -> size;
  int ix, iy;
  for (ix = 0; ix < esize; ix++)
    { element elt = all_elements -> array[ix];
      if (elt -> dom == domain_nil)
	{ domain new_dom = new_domain (init_int_list (esize));
	  new_dom -> dnr = all_domains -> size;
	  app_domain_list (all_domains, new_dom);
	  for (iy = 0; iy < esize; iy++)
	    if (closure [ix * esize + iy])		/* Includes ourself */
	      { element elt2 = all_elements -> array[iy];
	        elt2 -> log_value = new_dom -> elts -> size;
		app_int_list (new_dom -> elts, iy);
		if (elt2 -> dom != domain_nil)
		  dcg_internal_error ("allocate_super_domains");
		elt2 -> dom = new_dom;
	      };
	};
    };

  /* Now, we know the number of elements in each domain */
  for (iy = 0; iy < all_domains -> size; iy++)
    { domain dom = all_domains -> array[iy];
      dom -> width = dom -> elts -> size / 64 + 1;
    };

  /* Calculate for each element its singleton value */
  for (ix = 0; ix < esize; ix++)
    { element elt = all_elements -> array[ix];
      int lval = elt -> log_value;
      domain dom = elt -> dom;
      affix_value value;

      /* Create a small or big lattice value */
      if (dom -> width <= 1) value = new_Small_lattice (-1, dom -> dnr, 0);
      else 
	{ u_int64_list il = init_u_int64_list (dom -> width);
	  for (iy = 0; iy < dom -> width; iy++)
	    app_u_int64_list (il, u_int64_const (0));
	  value = new_Large_lattice (-1, dom -> dnr, il);
	};

      /* Fill value */
      if (dom -> width <= 1) value -> Small_lattice.slat |= (u_int64_const (1) << lval);
      else value -> Large_lattice.llat -> array[lval/64] |= (u_int64_const (1) << (lval % 64));
      elt -> value = value;
    };
}

static void assign_domains_to_affix_nonterminals ()
{ int esize = all_elements -> size;
  int ix, iy;
  for (ix = 0; ix < all_affix_rules -> size; ix++)
    { affix_rule arule = all_affix_rules -> array[ix];
      if (arule -> kind != arule_lattice) continue;
      for (iy = 0; iy < esize; iy++)
	{ element elt = all_elements -> array[iy];
          if (!element_relation[arule -> anr * esize + iy])
	    continue;
	  if (arule -> dom == domain_nil)
	    arule -> dom = elt -> dom;
	  else if (arule -> dom != elt -> dom)
	    dcg_internal_error ("assign_domains_to_affix_nonterminals");
	};
    };
}

static void assign_values_to_affix_nonterminals ()
{ int esize = all_elements -> size;
  int ix, iy;
  for (ix = 0; ix < all_affix_rules -> size; ix++)
    { affix_rule arule = all_affix_rules -> array[ix];
      affix_value value;
      domain dom;
      if (arule -> kind != arule_lattice) continue;
      dom = arule -> dom;

      /* Create a small or big lattice value */
      if (dom -> width <= 1) value = new_Small_lattice (-1, dom -> dnr, 0);
      else 
	{ u_int64_list il = init_u_int64_list (dom -> width);
	  for (iy = 0; iy < dom -> width; iy++)
	    app_u_int64_list (il, u_int64_const (0));
	  value = new_Large_lattice (-1, dom -> dnr, il);
	};

      /* Fill value */
      for (iy = 0; iy < esize; iy++)
	{ element elt = all_elements -> array[iy];
	  int lval = elt -> log_value;
	  if (!element_relation[arule -> anr * esize + iy])
	    continue;
	  if (dom -> width <= 1) value -> Small_lattice.slat |= (u_int64_const (1) << lval);
	  else value -> Large_lattice.llat -> array[lval/64] |= (u_int64_const (1) << (lval % 64));
	};

      arule -> value = value;
    };
}

static void try_dump_domains ()
{ int ix, iy;
  if (!dump_lattices) return;
  for (ix = 0; ix < all_domains -> size; ix++)
    { domain dom = all_domains -> array[ix];
      int_list elts = dom -> elts;
      dcg_eprint ("Domain %2d (%3d):", ix, elts -> size);
      for (iy = 0; iy < elts -> size; iy++)
        { element elt = all_elements -> array[elts -> array[iy]];
	  if (iy % 16 == 15)
	    dcg_eprint ("\n                ");
	  dcg_eprint (" %3d (%3d)", elt -> enr, elt -> log_value);
	};
      dcg_wlog ("");
    };

  for (ix = 0; ix < all_affix_rules -> size; ix++)
    { affix_rule arule = all_affix_rules -> array[ix];
      affix_value value = arule -> value;
      if (arule -> kind != arule_lattice) continue;
      dcg_eprint ("Affix rule %s of domain %d has value: ", arule -> aname, arule -> dom -> dnr);
      if (value -> tag == TAGSmall_lattice)
	dcg_wlog ("0x%llx", value -> Small_lattice.slat);
      else
        { u_int64_list llat = value -> Large_lattice.llat;
	  int ix;
	  dcg_eprint ("[ ");
	  for (ix = llat -> size - 1; 0 <= ix; ix--)
	    dcg_eprint ("0x%llx%s", llat -> array[ix], (ix)?", ":"");
	  dcg_wlog (" ]");
	};
    };

  for (ix = 0; ix < all_elements -> size; ix++)
    { element elt = all_elements -> array[ix];
      affix_value value = elt -> value;
      dcg_eprint ("Element %s of domain %d has value: ", elt -> name, elt -> dom -> dnr);
      if (value -> tag == TAGSmall_lattice)
	dcg_wlog ("0x%llx", value -> Small_lattice.slat);
      else
        { u_int64_list llat = value -> Large_lattice.llat;
	  int ix;
	  dcg_eprint ("[ ");
	  for (ix = llat -> size - 1; 0 <= ix; ix--)
	    dcg_eprint ("0x%llx%s", llat -> array[ix], (ix)?", ":"");
	  dcg_wlog (" ]");
	};
    };
}

void construct_lattices ()
{ int esize = all_elements -> size;
  if (esize == 0) esize = 1;	/* To cope with a universe without lattices */
  char *closure = dcg_warshall (esize, interference_matrix);
  try_dump_matrix (interference_matrix, "Interference matrix");
  try_dump_matrix (closure, "Closure of interference matrix");
  allocate_super_domains (closure);
  assign_domains_to_affix_nonterminals ();
  assign_values_to_affix_nonterminals ();
  try_dump_domains (); 
  dcg_detach ((void **) &closure);
  dcg_detach ((void **) &interference_matrix);
} 
