/*
   File: affix_values.c
   Maintains the administration of all affix values

   Copyright 2007 Radboud University of Nijmegen
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
 
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU Library General Public License for more details.
 
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

   CVS ID: "$Id: affix_values.c,v 1.10 2007/08/01 14:18:30 marcs Exp $"
*/

/* system includes */
#include <stdio.h>
#include <string.h>

/* libabase includes */
#include <abase_repr.h>
#include <abase_error.h>
#include <abase_memalloc.h>
#include <abase_fileutil.h>
#include <abase_warshall.h>

/* local includes */
#include "options.h"
#include "dyn_array.h"
#include "affix_values.h"

/*
   We have to maintain an administration of all INT, TEXT and set affix values
   in such a way, that each affix value is stored only once.

   The basic administration will therefore use hashing techniques for lookup.
   Once a value has been stored, its index in the INT, TEXT or set affix
   administration is used as its identification. Extension to set affix
   values with more than 32 bits is easy (We need an extra field for that).

   The hash lists themselves are simple dynamic integer arrays which index
   the all_{int,string,set}_affixes for access.
*/
typedef struct set_affix_rec
{ char *affix_name;
  int coder_number;
  /* int length */
  Bitset32 affix_value;
  int lhs;
  int_array lhs_idxs;
} *set_affix;

/*
  Uncomment when longer set affixes are needed

  typedef struct long_set_affix_rec
  { char *affix_name;
    int coder_number;
    int length;
    Bitset32 *affix_value;
    int lhs;
    int_array lhs_idxs
  } *long_set_affix;
*/

static int_array all_int_affixes;
static hash_container int_hash_container;
static text_array all_text_affixes;
static hash_container text_hash_container;
static ptr_array all_set_affixes;
static hash_container set_hash_container;

/*
   First create the hash functions
*/
static int hash_int (int nr, int hsize)
{ unsigned result = ((unsigned) nr) % ((unsigned) hsize);
  return ((int) result); 	/* and known to be > 0 */
}

#define streq(s1,s2) (strcmp((s1),(s2)) == 0)
static int hash_text (char *s, int hsize)
{ unsigned value = 0;
  unsigned result;
  char *ptr;

  for (ptr = s; *ptr; ptr++) value = (131 * value + ((unsigned int) (*ptr))) & 0x7ffffff;
  result = value % ((unsigned) hsize);
  return ((int) result);
};

static int hash_union_int_array (int_array arr, int hsize)
{ unsigned value = 0;
  unsigned result;
  int ix;
  for (ix = 0; ix < arr -> size; ix++)
    { int aff_nr = arr -> array[ix];
      set_affix s = (set_affix) all_set_affixes -> array[aff_nr];
      char *ptr = s -> affix_name;
      if (ix) value = (131 * value + ((unsigned int) ('|'))) & 0x7ffffff;
      for (; *ptr; ptr++) value = (131 * value + ((unsigned int) (*ptr))) & 0x7ffffff;
    };
  result = value % ((unsigned) hsize);
  return ((int) result);
}

/*
   Registry of INT affixes
*/
int register_new_int_affix (int value)
{ int hindex = hash_int (value, affix_hash_size);
  hash_list hl = int_hash_container[hindex];
  int affix_index, ix;

  /* Locate int on hash list */
  for (ix = 0; ix < hl -> size; ix++)
    { int affix_index = hl -> array[ix];
      int affix_value = all_int_affixes -> array[affix_index];
      if (value == affix_value)
	return (affix_index);
    };

  /* INT affix is new */
  affix_index = all_int_affixes -> size;
  app_int_array (all_int_affixes, value);
  app_hash_list (hl, affix_index);
  return (affix_index);
};

/*
   Registry of TEXT affixes
*/
int register_new_text_affix (char *value)
{ int hindex = hash_text (value, affix_hash_size);
  hash_list hl = text_hash_container[hindex];
  int affix_index, ix;
  char *new_text;

  /* Locate text on hash list */
  for (ix = 0; ix < hl -> size; ix++)
    { int affix_index = hl -> array[ix];
      char *affix_value = all_text_affixes -> array[affix_index];
      if (streq (value, affix_value))
	return (affix_index);
    };

  /* TEXT affix is new: create a unique copy */
  affix_index = all_text_affixes -> size;
  new_text = abs_new_string (value, "register_new_text_affix");
  app_text_array (all_text_affixes, new_text);
  app_hash_list (hl, affix_index);
  return (affix_index);
}

/*
   Lookup and registry of set affixes
*/
int lookup_set_affix (char *affix_name)
{ int hindex = hash_text (affix_name, affix_hash_size);
  hash_list hl = set_hash_container[hindex];
  int ix;
  for (ix = 0; ix < hl -> size; ix++)
    { int affix_index = hl -> array[ix];
      set_affix affix = (set_affix) (all_set_affixes -> array[affix_index]);
      if (streq (affix_name, affix -> affix_name))
	return (affix_index);
    };
  return (-1);
}

int register_new_set_affix (char *affix_name, int coder_number, Bitset32 affix_value,
			    int lhs, int prev_lhs)
{ int hindex = hash_text (affix_name, affix_hash_size);
  hash_list hl = set_hash_container[hindex];
  int affix_index, ix;
  set_affix affix;
  for (ix = 0; ix < hl -> size; ix++)
    { affix_index = hl -> array[ix];
      affix = (set_affix) (all_set_affixes -> array[affix_index]);
      if (streq (affix_name, affix -> affix_name))
	{ /* This affix name has already been registered */
	  /* Check if the new entry and the existing one are consistent */
	  if ((affix -> coder_number != coder_number) ||
	      (affix -> affix_value != affix_value)) return (-1);
	  if (lhs) affix -> lhs = 1;
	  else app_uniq_int_array (affix -> lhs_idxs, prev_lhs);
	  return (affix_index);
	};
    };

  /* The affix is new: allocate a new entry for it */
  affix = (set_affix) abs_malloc (sizeof (struct set_affix_rec), "register_new_set_affix");
  affix -> affix_name = affix_name;
  affix -> coder_number = coder_number;
  affix -> affix_value = affix_value;
  affix -> lhs = lhs;
  affix -> lhs_idxs = new_int_array ();
  if (!lhs) app_int_array (affix -> lhs_idxs, prev_lhs);
  affix_index = all_set_affixes -> size;
  app_ptr_array (all_set_affixes, (void *) affix);
  app_hash_list (hl, affix_index);
  return (affix_index);
}

/*
   For union affixes, we need a special equality and name generation (and hash function)
*/
static int equal_union_name (int_array idxs, char *affix_name)
{ char *ptr = affix_name;
  int ix;
  for (ix = 0; ix < idxs -> size; ix++)
    { int aff_nr = idxs -> array[ix];
      set_affix s = (set_affix) all_set_affixes -> array[aff_nr];
      char *sptr = s -> affix_name;
      for (; *sptr; ptr++, sptr++)
	if (*sptr != *ptr) return (0);
      if (ix != idxs -> size - 1)
	{ /* Not the last affix in the union, check for '|' */
	  if (*ptr != '|')
	    return (0);
          ptr++;
	};
    };

  /* Check if this was the last entry in the stored name */
  if (*ptr) return (0);
  return (1);
}

static char *new_union_name (int_array idxs)
{ int ix;
  int total_len = 0;
  char *new_name, *ptr;
  for (ix = 0; ix < idxs -> size; ix++)
    { int aff_nr = idxs -> array[ix];
      set_affix s = (set_affix) all_set_affixes -> array[aff_nr];
      total_len += (int) strlen (s -> affix_name) + 1;	/* either '|' or '\0' */
    };
  new_name = abs_malloc (total_len, "new_union_name");
  for (ix = 0, ptr = new_name; ix < idxs -> size; ix++)
    { int aff_nr = idxs -> array[ix];
      set_affix s = (set_affix) all_set_affixes -> array[aff_nr];
      char *sptr = s -> affix_name;
      if (ix) *ptr++ = '|';
      while (*sptr) *ptr++ = *sptr++;
    };
  *ptr = '\0';
  return (new_name);
}

static Bitset32 new_union_value (int_array idxs)
{ Bitset32 union_val = 0; 
  int ix;
  for (ix = 0; ix < idxs -> size; ix++)
    { int aff_nr = idxs -> array[ix];
      set_affix s = (set_affix) all_set_affixes -> array[aff_nr];
      union_val |= s -> affix_value;
    };
  return (union_val);
}

int register_new_union_affix (int_array idxs, int formal)
{ int hindex = hash_union_int_array (idxs, affix_hash_size);
  hash_list hl = set_hash_container[hindex];
  int affix_index, ix;
  set_affix affix;
  for (ix = 0; ix < hl -> size; ix++)
    { affix_index = hl -> array[ix];
      affix = (set_affix) (all_set_affixes -> array[affix_index]);
      if (equal_union_name (idxs, affix -> affix_name))
	{ /* This union has already been registered */
	  app_uniq_int_array (affix -> lhs_idxs, formal);
	  return (affix_index);
	};
    };

  /* The union affix is new: allocate a new entry for it */
  affix = (set_affix) abs_malloc (sizeof (struct set_affix_rec), "register_new_union_affix");
  affix -> affix_name = new_union_name (idxs);
  affix -> coder_number = -1;
  affix -> affix_value = new_union_value (idxs);
  affix -> lhs = 0;
  affix -> lhs_idxs = new_int_array ();
  app_int_array (affix -> lhs_idxs, formal);
  affix_index = all_set_affixes -> size;
  app_ptr_array (all_set_affixes, (void *) affix);
  app_hash_list (hl, affix_index);
  return (affix_index);
}

/*
   Type checking of affixes and the creation of the union of affixes
*/
char *affix_name_from_index (int affix_index)
{ switch (affix_index)
    { case FormalINT:	return ("INT");
      case FormalTEXT:	return ("TEXT");
      case FormalSET:	return ("<SET>");
      case FormalUNION:	return ("<UNION>");
      case FormalERROR:	return ("<ERROR>");
      default:
	if ((0 <= affix_index) && (affix_index < all_set_affixes -> size))
	  { set_affix s = (set_affix) all_set_affixes -> array[affix_index];
	    return (s -> affix_name);
	  };
    };
  abs_bug ("affix_name_from_index", "Illegal index %d", affix_index);
  return (NULL);
}

Bitset32 affix_value_from_index (int affix_index)
{ if ((0 <= affix_index) && (affix_index < all_set_affixes -> size))
    { set_affix s = (set_affix) all_set_affixes -> array[affix_index];
      return (s -> affix_value);
    };

  abs_bug ("affix_value_from_index", "Illegal index %d", affix_index);
  return (0);
}

/*
   Calculate the closure of the set affixes so that
   we can implement a typecheck of the dat file
*/
static int nr_lif_affixes;
static char *affix_in_domain;
static char *affix_in_domain_closure;
void calculate_affix_values_closure ()
{ int size = all_set_affixes -> size;
  int ix, iy;

  /* Allocate the relation and initialize */
  nr_lif_affixes = size;
  affix_in_domain = (char *) abs_calloc (size * size, sizeof (char),
					 "calculate_affix_values_closure");
  for (ix = 0; ix < size; ix++)
    for (iy = 0; iy < size; iy++)
      affix_in_domain[ix * size + iy] = (ix == iy)?1:0;

  /* Set rel [ix * size + iy] to 1 if affix ix is in domain iy */
  for (ix = 0; ix < size; ix++)
    { set_affix affix = (set_affix) all_set_affixes -> array[ix];
      int_array lhs_idxs = affix -> lhs_idxs;
      for (iy = 0; iy < lhs_idxs -> size; iy++)
	affix_in_domain[ix * size + lhs_idxs -> array[iy]] = 1;
    };

  /* Take the closure of the domain relation */
  affix_in_domain_closure = abs_warshall (size, affix_in_domain);
}

/*
   Typechecking of set affixes
*/
int affix_belongs_to_domain (int affix_nr, int domain_nr)
{ return (affix_in_domain_closure[affix_nr * nr_lif_affixes + domain_nr]);
}

/*
   Generation of statistic information arious output like the info file
*/
int nr_of_int_affixes ()
{ return (all_int_affixes -> size);
}

int nr_of_text_affixes ()
{ return (all_text_affixes -> size);
}

int nr_of_set_affixes ()
{ return (all_set_affixes -> size);
}

/*
   Generation of debugging output i.e. the info file
*/
static void generate_int_affix_values_info (FILE *info)
{ int ix;
  fprintf (info, "INT affix table:\n");
  for (ix = 0; ix < all_int_affixes -> size; ix++)
    fprintf (info, "%4d: %d\n", ix, all_int_affixes -> array[ix]);
}

static void generate_text_affix_values_info (FILE *info)
{ int ix;
  fprintf (info, "TEXT affix table:\n");
  for (ix = 0; ix < all_text_affixes -> size; ix++)
    fprintf (info, "%4d: '%s'\n", ix, all_text_affixes -> array[ix]);
}

static void generate_set_affix_values_info (FILE *info)
{ int ix;
  fprintf (info, "Set affix table:\n");
  for (ix = 0; ix < all_set_affixes -> size; ix++)
    { set_affix affix = (set_affix) all_set_affixes -> array[ix];
      int_array lhs_idxs = affix -> lhs_idxs;
      int iy;
      fprintf (info, "%4d: %s %s [", ix, affix -> affix_name, (affix -> lhs)?"lhs":"rhs");
      for (iy = 0; iy < lhs_idxs -> size; iy++)
	{ if (iy) fprintf (info, ",");
	  fprintf (info, " %d", lhs_idxs -> array[iy]);
	};
      fprintf (info, " ]\n");
    };
}

void generate_affix_values_info (FILE *info)
{ generate_int_affix_values_info (info);
  generate_text_affix_values_info (info);
  generate_set_affix_values_info (info);
}

/*
   Dumping of affix values
*/
void dump_affix_value (FILE *dump, int fpar, int act)
{ switch (fpar)
    { case FormalINT:
	fprintf (dump, "%d", all_int_affixes -> array[act]);
	break;
      case FormalTEXT:
	fprintf (dump, "%s", all_text_affixes -> array[act]);
	break;
      default:
	{ set_affix affix = (set_affix) all_set_affixes -> array[act];
	  fprintf (dump, "%s", affix -> affix_name);
	};
    };
}

/*
   Binary save the affix values
   We also save the nr of set affixes found in the lif
   which is needed to check for recompilation
*/
static void bin_save_set_affix (BinFile bf, set_affix affix)
{ abs_bin_save_string (bf, affix -> affix_name);
  abs_bin_save_u_int (bf, affix -> affix_value);
  abs_bin_save_int (bf, affix -> lhs);
  bin_save_int_array (bf, affix -> lhs_idxs);
}

void bin_save_affix_values (BinFile bf)
{ int ix;
  abs_bin_save_int (bf, nr_lif_affixes);
  abs_bin_save_int (bf, all_set_affixes -> size);
  for (ix = 0; ix < all_set_affixes -> size; ix++)
    bin_save_set_affix (bf, (set_affix) all_set_affixes -> array[ix]);
  bin_save_int_array (bf, all_int_affixes);
  bin_save_text_array (bf, all_text_affixes);
}

static int is_new_set_affix (BinFile bf, set_affix affix)
{ int ival;
  u_int uval;
  char *sval;
  abs_bin_load_string (bf, &sval);
  if (!streq (sval, affix -> affix_name)) return (1);
  abs_free (sval, "is_new_set_affix");
  abs_bin_load_u_int (bf, &uval);
  if (uval != affix -> affix_value) return (1);
  abs_bin_load_int (bf, &ival);
  if (ival != affix -> lhs) return (1);
  return (bin_cmp_int_array (bf, affix -> lhs_idxs));
}

static void bin_ignore_set_affix (BinFile bf)
{ int ival, size, ix;
  u_int uval;
  char *sval;
  abs_bin_load_string (bf, &sval);
  abs_free (sval, "bin_ignore_set_affix");
  abs_bin_load_u_int (bf, &uval);
  abs_bin_load_int (bf, &ival);
  abs_bin_load_int (bf, &size);
  for (ix = 0; ix < size; ix++)
    abs_bin_load_int (bf, &ival);
}

static void bin_ignore_int_array (BinFile bf)
{ int size, ival, ix;
  abs_bin_load_int (bf, &size);
  for (ix = 0; ix < size; ix++)
    abs_bin_load_int (bf, &ival);
}

static void bin_ignore_text_array (BinFile bf)
{ int size, ix;
  char *sval;
  abs_bin_load_int (bf, &size);
  for (ix = 0; ix < size; ix++)
    { abs_bin_load_string (bf, &sval);
      abs_free (sval, "bin_ignore_text_array");
    };
}


int has_new_lif_affix_values (BinFile bf)
{ int ix, value, nr_set_affixes;
  abs_bin_load_int (bf, &value);
  if (value != nr_lif_affixes) return (1);
  abs_bin_load_int (bf, &nr_set_affixes);
  for (ix = 0; ix < nr_lif_affixes; ix++)
    if (is_new_set_affix (bf, (set_affix) all_set_affixes -> array[ix]))
      return (1);
  for (; ix < nr_set_affixes; ix++)
    bin_ignore_set_affix (bf);
  bin_ignore_int_array (bf);
  bin_ignore_text_array (bf);
  return (0);
}

/*
   Initialization stuff
*/
void init_affix_values ()
{ int ix;
  all_int_affixes = init_int_array (affix_hash_size);
  int_hash_container =
	(hash_container) abs_calloc (affix_hash_size, sizeof (hash_list), "init_affix_values");
  all_text_affixes = init_text_array (affix_hash_size);
  text_hash_container =
	(hash_container) abs_calloc (affix_hash_size, sizeof (hash_list), "init_affix_values");
  all_set_affixes = init_ptr_array (affix_hash_size);
  set_hash_container =
	(hash_container) abs_calloc (affix_hash_size, sizeof (hash_list), "init_affix_values");
  for (ix = 0; ix < affix_hash_size; ix++)
    { int_hash_container[ix] = new_hash_list ();
      text_hash_container[ix] = new_hash_list ();
      set_hash_container[ix] = new_hash_list ();
    };
}
