/*
   File: affix_values.c
   Collects all used actual affix values
   Also administers the formal domains

   Copyright (C) 2012 Marc Seutter

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.

   CVS ID: "$Id: affix_values.c,v 1.9 2013/03/13 10:07:34 marcs Exp $"
*/

/* standard includes */
#include <stdio.h>
#include <math.h>

/* libdcg includes */
#include <dcg.h>
#include <dcg_error.h>

/* libeagbase includes */
#include <ebase_version.h>
#include <ebase_ds.h>
#include <ebase_hash.h>

/* libeaglex includes */
#include <ebase_vocabulary.h>

/* Local includes */
#include "options.h"
#include "globals.h"
#include "affix_values.h"

/*
   For affix type lookup, we use three int_lists instead of
   a list of structs so that we do not have to code this stuff.

   We use this hash lookup to recognize the parts of actual affix
   values. In these, affix terminals and nonterminals from lattices
   may occur as well as numbers (INT and REAL), texts and terminal
   markers from enumerable tree types. For the latter, we need a
   lookup that can handle sequences of affix terminals.
*/
static int_list_list type_hash_container;
static int_list_list affix_hash_container;
static int type_hash_size;
static int_list type_kinds;
static int_list type_nrs;
static int_list type_eltnrs;
static int int_affixes;
static int text_affixes;
static int real_affixes;
static int small_lattice_affixes;
static int large_lattice_affixes;

static void allocate_hash_containers ()
{ int ix;
  int sum = 0;
  for (ix = 0; ix < all_rt_domains -> size; ix++)
    sum += all_rt_domains -> array[ix] -> elts -> size;
  sum += all_rt_types -> size;
  type_hash_size = sum;
  type_hash_container = init_int_list_list (type_hash_size);
  for (ix = 0; ix < type_hash_size; ix++)
    app_int_list_list (type_hash_container, new_int_list ());
  type_kinds = init_int_list (type_hash_size);
  type_nrs = init_int_list (type_hash_size);
  type_eltnrs = init_int_list (type_hash_size);
  affix_hash_container = init_int_list_list (affix_hash_size);
  for (ix = 0; ix < affix_hash_size; ix++)
    app_int_list_list (affix_hash_container, new_int_list ());
  int_affixes = 0;
  text_affixes = 0;
  real_affixes = 0;
  small_lattice_affixes = 0;
  large_lattice_affixes = 0;
}

static void enter_domain_elements ()
{ int ix, iy;
  for (ix = 0; ix < all_rt_domains -> size; ix++)
    { rt_domain dom = all_rt_domains -> array[ix];
      rt_element_list elts = dom -> elts;
      for (iy = 0; iy < elts -> size; iy++)
	{ rt_element elt = elts -> array[iy];
	  int hash = ebs_hash_text (elt -> name, type_hash_size);
	  int_list bucket = type_hash_container -> array[hash];
	  app_int_list (bucket, type_kinds -> size);
	  app_int_list (type_kinds, TYPE_ELEMENT);
	  app_int_list (type_nrs, ix);		/* domain number */
	  app_int_list (type_eltnrs, iy);	/* element number */
	};
    };
}

static void register_affix_nonterminal (rt_type rt, int rt_nr)
{ /* Register the type if it has a lattice value */
  int hash = ebs_hash_text (rt -> name, type_hash_size);
  int_list bucket = type_hash_container -> array[hash];
  app_int_list (bucket, type_kinds -> size);
  app_int_list (type_kinds, TYPE_AFFIX_NONTERMINAL);
  app_int_list (type_nrs, rt_nr);	/* type number */
  app_int_list (type_eltnrs, -1);	/* No element nr */
}

static int elements_collide (rt_elem_list elems, int_list bucket)
{ int ix;
  for (ix = 0; ix < bucket -> size; ix++)
    { int his_nr = bucket -> array[ix];
      int his_kind = type_kinds -> array[his_nr];
      int his_type_nr = type_nrs -> array[his_nr];
      int his_eltnr = type_eltnrs -> array[his_nr];
      switch (his_kind)
	{ case TYPE_ELEMENT:
	    { rt_domain his_domain = all_rt_domains -> array[his_type_nr];
	      rt_element his_element = his_domain -> elts -> array[his_eltnr];
	      if (elems -> size != 1) break;
	      if (streq (elems -> array[0] -> Marker.txt, his_element -> name))
		return (1);
	    }; break;
	  case TYPE_MARKER:
	    { rt_type his_type = all_rt_types -> array[his_type_nr];
	      rt_elem_list his_elems = his_type -> Tree_type.alts -> array[his_eltnr] -> elems;
	      int iy;
	      int equal = 1;
	      if (elems -> size != his_elems -> size) break;
	      for (iy = 0; iy < elems -> size; iy++)
		if (!streq (elems -> array[iy] -> Marker.txt,
			    his_elems -> array[iy] -> Marker.txt))
		  { equal = 0;
		    break;
		  };
	      if (equal) return (1);
	    };
	  case TYPE_AFFIX_NONTERMINAL: break;
	  default: dcg_bad_tag (his_kind, "elements_collide");
	};
    };
  return (0);
}

static void register_markers (rt_type rt, int rt_nr)
{ rt_alt_list alts = rt -> Tree_type.alts;
  int ix, iy; 
  for (ix = 0; ix < alts -> size; ix++)
    { rt_alt alt = alts -> array[ix];
      int marker = alt -> marker;
      rt_elem_list elems = alt -> elems;
      int_list bucket;
      unsigned int elems_hash = 0;
      int hash;
      for (iy = 0; iy < elems -> size; iy++)
	{ rt_elem elem = elems -> array[iy];
	  if (elem -> tag != TAGMarker)
	    dcg_internal_error ("register_markers");
	  elems_hash = ebs_hash_continued_text (elem -> Marker.txt, elems_hash);
	};
      hash = ((int) elems_hash % type_hash_size);

      bucket = type_hash_container -> array[hash];
      if (elements_collide (elems, bucket))
	{ dcg_error (0, "Marker %d of type %s is not unique", marker, rt -> name);
	  continue;
	};

      /* Register these elements */
      app_int_list (bucket, type_kinds -> size);
      app_int_list (type_kinds, TYPE_MARKER);
      app_int_list (type_nrs, rt_nr);	/* type number */
      app_int_list (type_nrs, marker);	/* alt number */
    };
}

static int rt_int_nr, rt_real_nr, rt_text_nr;
static void enter_affix_nonterminals_and_markers ()
{ int ix;
  for (ix = 0; ix < all_rt_types -> size; ix++)
    { rt_type rt = all_rt_types -> array[ix];
      if (!rt -> lexgen) continue;
      
      /* Special treatment for the predefined types */
      if (streq (rt -> name, "INT"))
	rt_int_nr = ix;
      else if (streq (rt -> name, "REAL"))
	rt_real_nr = ix;
      else if (streq (rt -> name, "TEXT"))
	rt_text_nr = ix;

      switch (rt -> tag)
	{ case TAGLattice_type: register_affix_nonterminal (rt, ix); break;
	  case TAGTree_type:    register_markers (rt, ix);
	  default: break;
	};
    };
}

void prepare_affix_values ()
{ allocate_hash_containers ();
  enter_domain_elements ();
  enter_affix_nonterminals_and_markers ();
}

/*
   After the preparation, we should be able to determine the
   type (or affix domain) from an affix terminal, nonterminal
   or tree type marker. We have also stored the ids of the
   standard types INT, REAL and TEXT, so that we can collect
   with each set of actual parameters, a set of corresponding
   formal parameters. For lattice elements (or affix nonterminals),
   we are not able to definitively decide on the formal parameters
   but at least we know the affix domain it is coming from.
*/
int int_formal ()
{ return (rt_int_nr);
}

int real_formal ()
{ return (rt_real_nr);
}

int text_formal ()
{ return (rt_text_nr);
}

/*
   Lookup of an affix element, affix nonterminal or single marker
*/
int lookup_value_string (char *str, int *kind, int *type_nr, int *elt_nr)
{ int hash = ebs_hash_text (str, type_hash_size);
  int_list bucket = type_hash_container -> array[hash];
  int ix;
  for (ix = 0; ix < bucket -> size; ix++)
    { int my_nr = bucket -> array[ix];
      int my_kind = type_kinds -> array[my_nr];
      int my_type_nr = type_nrs -> array[my_nr];
      int my_elt_nr = type_eltnrs -> array[my_nr];
      switch (my_kind)
	{ case TYPE_ELEMENT:
	    { rt_domain my_domain = all_rt_domains -> array[my_type_nr];
	      rt_element my_element = my_domain -> elts -> array[my_elt_nr];
	      if (streq (str, my_element -> name))
		{ *kind = my_kind;
		  *type_nr = my_type_nr;
		  *elt_nr = my_elt_nr;
		  return (1);
		};
	    }; break;
	  case TYPE_AFFIX_NONTERMINAL:
	    { rt_type my_type = all_rt_types -> array[my_type_nr];
	      if (streq (str, my_type -> name))
		{ *kind = my_kind;
		  *type_nr = my_type_nr;
		  *elt_nr = my_elt_nr;
		  return (1);
		};
	    }; break;
	  case TYPE_MARKER:
	    { rt_type my_type = all_rt_types -> array[my_type_nr];
	      rt_elem_list my_elems = my_type -> Tree_type.alts -> array[my_elt_nr] -> elems;
	      if ((my_elems -> size == 1) && streq (str, my_elems -> array[0] -> Marker.txt))
		{ *kind = my_kind;
		  *type_nr = my_type_nr;
		  *elt_nr = my_elt_nr;
		  return (1);
		};
	    }; break;
	  default: dcg_bad_tag (my_kind, "lookup_value_string");
	};
    };

  /* Return unknown */
  *kind = TYPE_UNKNOWN;
  return (0);
}

/*
   Lookup a sequence of markers
*/
int lookup_value_string_list (string_list list, int *kind, int *type_nr, int *elt_nr)
{ unsigned int list_hash = 0;
  int_list bucket;
  int hash, ix;
  for (ix = 0; ix < list -> size; ix++)
    list_hash = ebs_hash_continued_text (list -> array[ix], list_hash);
  hash = ((int) list_hash % type_hash_size);
  bucket = type_hash_container -> array[hash];
  for (ix = 0; ix < bucket -> size; ix++)
    { int my_nr = bucket -> array[ix];
      int my_kind = type_kinds -> array[my_nr];
      int my_type_nr = type_nrs -> array[my_nr];
      int my_elt_nr = type_eltnrs -> array[my_nr];
      if (my_kind == TYPE_MARKER)
	{ rt_type my_type = all_rt_types -> array[my_type_nr];
	  rt_elem_list my_elems = my_type -> Tree_type.alts -> array[my_elt_nr] -> elems;
	  int iy;
	  if (my_elems -> size != list -> size) continue;
	  for (iy = 0; iy < my_elems -> size; iy++)
	    if (streq (my_elems -> array[iy] -> Marker.txt, list -> array[iy]))
	      { *kind = my_kind;
		*type_nr = my_type_nr;
		*elt_nr = my_elt_nr;
		return (1);
	      };
	};
    };

  /* Return unknown */
  *kind = TYPE_UNKNOWN;
  return (0);
}

/*
   Affix value collection
*/
#define SOME_PRIME 5693
static int calculate_affix_hash (affix_value value)
{ switch (value -> tag)
    { case TAGText_value: return (ebs_hash_text (value -> Text_value.text, affix_hash_size));
      case TAGInt_value:  return (ebs_hash_int  (value -> Int_value.ival,  affix_hash_size));
      case TAGReal_value: return (ebs_hash_real (value -> Real_value.rval, affix_hash_size));
      case TAGSmall_lattice:
	{ unsigned int low = (unsigned int) value -> Small_lattice.slat;
	  unsigned int high = (unsigned int) (value -> Small_lattice.slat >> 32);
	  /* Ignore overflow */
	  unsigned int total = high + low;
	  total += (unsigned int) (SOME_PRIME * value -> Small_lattice.dom);
	  return (ebs_hash_int (total, affix_hash_size));
	};
      case TAGLarge_lattice:
	{ u_int64_list ul = value -> Large_lattice.llat;
	  unsigned int total = 0;
	  int ix;
	  /* Ignore overflow */
	  for (ix = 0; ix < ul -> size; ix++)
	    { u_int64 uv = ul -> array[ix];
	      total += (unsigned int) uv;
	      total += (unsigned int) (uv >> 32);
	    };
	  total += (unsigned int) (SOME_PRIME * value -> Large_lattice.dom);
	  return (ebs_hash_int (total, affix_hash_size));
	};
      default: dcg_bad_tag (value -> tag, "calculate_affix_hash");
    };
  return (0);
}

static void collect_affix_stats (affix_value value)
{ switch (value -> tag)
    { case TAGText_value: text_affixes++; break;
      case TAGInt_value:  int_affixes++; break;
      case TAGReal_value: real_affixes++; break;
      case TAGSmall_lattice: small_lattice_affixes++; break;
      case TAGLarge_lattice: large_lattice_affixes++; break;
      default: dcg_bad_tag (value -> tag, "collect_affix_stats");
    }
}

int collect_actual_parameter (affix_value value)
{ int hash = calculate_affix_hash (value);
  int_list bucket = affix_hash_container -> array[hash];
  int new_idx;
  int ix;

  /* Check if affix is already present */
  for (ix = 0; ix < bucket -> size; ix++)
    { int his_idx = bucket -> array[ix];
      affix_value his_value = all_actuals -> array[his_idx];
      if (equal_affix_value (his_value, value))
	return (his_idx);
    };

  /* We have a new one */
  collect_affix_stats (value);
  new_idx = all_actuals -> size;
  app_int_list (bucket, new_idx);
  app_affix_value_list (all_actuals, attach_affix_value (value));
  return (new_idx);
}

int collect_critical_parameter (char *crit)
{ int *info_ptr = ebs_enter_into_vocabulary (critical_text_vocabulary, crit);
  if (*info_ptr == 0)
    { /* New entry: there is a fake entry in the critical vector at index 0 */
      *info_ptr = all_critical_texts -> size;
      app_string_list (all_critical_texts, attach_string (crit));
    };
  return (*info_ptr);
}

/*
   Report final statistices
*/
void report_affix_values ()
{ dcg_wlog ("   collected %d different affix values", all_actuals -> size);
  dcg_wlog ("      %d INT affixes, %d REAL affixes, %d TEXT affixes",
	    int_affixes, real_affixes, text_affixes);
  dcg_wlog ("      %d small lattice values, %d large lattice values",
	    small_lattice_affixes, large_lattice_affixes);
  dcg_wlog ("   collected %d critical text parameters", all_critical_texts -> size - 1);
}
