/*
   File: entries.c
   Maintains the administration of all parts of speech i.e. all
   combinations of a nonterminal id x actual parameters.

   Copyright 2007 Radboud University of Nijmegen
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
 
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU Library General Public License for more details.
 
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

   CVS ID: "$Id: entries.c,v 1.7 2009/02/19 16:27:33 marcs Exp $"
*/

/* system includes */
#include <stdio.h>
#include <string.h>

/* libabase includes */
#include <abase_repr.h>
#include <abase_error.h>
#include <abase_memalloc.h>
#include <abase_fileutil.h>

/* liblexicon includes */
#include <lxcn_fact_table.h>
#include <lxcn_vocabulary.h>

/* local includes */
#include "options.h"
#include "dyn_array.h"
#include "affix_values.h"
#include "nonterminals.h"
#include "entries.h"
#include "globals.h"

/* An entry is the combination of a call and a frequency */
typedef struct entry_rec
{ int call_id;
  int frequency;
} *entry;

/* Note: entry list 0 is reserved */
#define DEFAULT_ENTRIES 16384
static struct entry_rec *all_entries;
static int my_nr_of_entries, my_room_of_entries;
static ptr_array all_entry_lists;

static int_array create_or_locate_entry_list (int *info_ptr)
{ int_array new_el;
  int new_idx;
  if (*info_ptr) return ((int_array) all_entry_lists -> array[*info_ptr]);
  new_el = new_int_array ();
  new_idx = all_entry_lists -> size;
  app_ptr_array (all_entry_lists, (void *) new_el);
  *info_ptr = new_idx;
  return (new_el);
}

static void create_new_entry (int call_id, int frequency)
{ entry new_entry;
  if (my_nr_of_entries == my_room_of_entries)
    { my_room_of_entries *= 2;
      all_entries =
	(entry) abs_realloc ((void *) all_entries, my_room_of_entries * sizeof (struct entry_rec),
			     "create_new_entry");
    };
  new_entry = &all_entries[my_nr_of_entries];
  my_nr_of_entries++;
  new_entry -> call_id = call_id;
  new_entry -> frequency = frequency;
}

void register_new_entry (int *info_ptr, int call_id, int frequency)
{ int_array el = create_or_locate_entry_list (info_ptr); 
  entry this_entry;
  int ix;
  for (ix = 0; ix < el -> size; ix++)
    { this_entry = &all_entries[el -> array[ix]];
      if (this_entry -> call_id == call_id)
	{ /* Found entry: add frequency */
	  this_entry -> frequency += frequency;
	  return;
	};
    };
  app_int_array (el, my_nr_of_entries);
  create_new_entry (call_id, frequency);
}

int nr_of_entries ()
{ return (my_nr_of_entries);
}

int nr_of_entry_lists ()
{ return (all_entry_lists -> size);
}

/*
  Generate info file
*/
void generate_entries_info (FILE *info)
{ int ix;
  fprintf (info, "Entry lists:\n");
  for (ix = 1; ix < all_entry_lists -> size; ix++)
    { int_array el = (int_array) all_entry_lists -> array[ix];
      int iy;
      fprintf (info, "%8d:", ix);
      for (iy = 0; iy < el -> size; iy++)
        { entry this_entry = &all_entries[el -> array[iy]];
	  fprintf (info, " (%d,%d)", this_entry -> call_id, this_entry -> frequency);
	};
      fprintf (info, "\n");
    };
}

/*
  Entering into the global vocabularies and fact tables
*/
int *enter_into_lexicon (char *lexeme_ptr, int lexeme_marker)
{ Vocabulary new_voc = NULL;
  int *new_info_ptr;
  int ix;
  for (ix = 0; ix < all_lexeme_markers -> size; ix++)
    if (all_lexeme_markers -> array[ix] == lexeme_marker)
      { Vocabulary old_voc = (Vocabulary) all_vocabularies -> array[ix];
	return (lxcn_enter_into_vocabulary (&old_voc, lexeme_ptr));
      };

  /* We have encountered a new marker */
  new_info_ptr = lxcn_enter_into_vocabulary (&new_voc, lexeme_ptr);
  app_int_array (all_lexeme_markers, lexeme_marker);
  app_ptr_array (all_vocabularies, (void *) new_voc);
  return (new_info_ptr);
}

static int equal_fact (int_array facts, int index, int *key)
{ int nr_crits = key[0];
  int ix;
  
  for (ix = 0; ix < nr_crits; ix++)
    if (facts -> array[index + ix] != key[1 + ix])	/* nr_crits preceding */
      return (0);
  return (1);
}

#define DEFAULT_HASH_LEN 16
int *enter_into_fact_table (int fact_nr, int *key)
{ /* Calculate hash and locate correct int array */
  int hash = lxcn_hash_search_key (key, all_hash_sizes[fact_nr]);
  int_array facts = all_fact_tables[fact_nr][hash];
  int nr_crits = key[0];
  int ix, index;
  if (facts == NULL)
    { /* Really new fact */
      facts = init_int_array (DEFAULT_HASH_LEN);
      all_fact_tables[fact_nr][hash] = facts;
    };
  
  /* Try and locate fact */
  for (index = 0; index < facts -> size; index += (nr_crits + 1))
    if (equal_fact (facts, index, key))
      return (&facts -> array[index + nr_crits]);

  /* Fact is really new: create a fresh one */
  for (ix = 0; ix < nr_crits; ix++)
    app_int_array (facts, key[1 + ix]);
  app_int_array (facts, 0);
  return (&facts -> array[facts -> size - 1]);
}

/*
  Generate dump file
*/
static void dump_string (FILE *dump, char *key)
{ char *kptr = key;
  fputc ('\"', dump);
  for (; *kptr; kptr++)
    { switch (*kptr)
	{ case '\t': fprintf (dump, "\\t"); break;
	  case '\n': fprintf (dump, "\\n"); break;
	  case '\r': fprintf (dump, "\\r"); break;
	  case '\"': fprintf (dump, "\\\""); break;
	  default: fputc (*kptr, dump);
	};
    };
  fputc ('\"', dump);
}

static void dump_entry_list (FILE *dump, int_array el)
{ int ix;
  for (ix = 0; ix < el -> size; ix++)
    { entry this_entry = &all_entries[el -> array[ix]];
      fprintf (dump, "\t");
      dump_call (dump, this_entry -> call_id);
      if (this_entry -> frequency > 1)
	fprintf (dump, "\t%d", this_entry -> frequency);
      fprintf (dump, "\n");
    };
}

static void dump_vocabulary_entry (void *arg, char *key, int info)
{ FILE *dump = (FILE *) arg;
  int_array el = (int_array) all_entry_lists -> array[info];
  dump_string (dump, key);
  fprintf (dump, ": ");
  dump_entry_list (dump, el);
}

void dump_vocabulary_entries (FILE *dump)
{ int ix;
  for (ix = 0; ix < all_lexeme_markers -> size; ix++)
    { int marker = all_lexeme_markers -> array[ix];
      Vocabulary voc = (Vocabulary) all_vocabularies -> array[ix];
      void *arg = (void *) dump;
      fprintf (dump, "Vocabulary with marker %d\n", marker);
      lxcn_iterate_over_vocabulary (voc, dump_vocabulary_entry, arg);
    };
}

static void dump_fact_call_list (FILE *dump, int_array formals, int_array criticals,
				 int nr_crits, int_array call_list)
{ int index;
  for (index = 0; index < call_list -> size; index += (nr_crits + 1))
    { int_array el = (int_array) all_entry_lists -> array[call_list -> array[index + nr_crits]];
      int ix, cidx;
      for (ix = 0, cidx = 0; ix < formals -> size; ix++)
	if (criticals -> array[ix])
	  { int crit_idx = call_list -> array[index + ix];
	    if (cidx) fprintf (dump, ", ");
	    if (formals -> array[ix] == FormalTEXT)
	      fprintf (dump, "\"%s\"", crit_text_vector -> array[crit_idx]);
	    else fprintf (dump, "%d ", crit_idx);
	  };
      fprintf (dump, ": ");
      dump_entry_list (dump, el);
    }
}

static void dump_fact_calls (FILE *dump, int fact_nr, hash_container table, int size)
{ int nont_id = nont_id_from_fact_nr (fact_nr);
  int_array formals = formals_from_nonterminal (nont_id);
  int_array criticals = criticals_from_nonterminal (nont_id);
  int nr_crits = count_criticals (criticals);
  int ix;
  for (ix = 0; ix < size; ix++)
    { int_array call_list = table[ix];
      if (call_list == NULL) continue;
      dump_fact_call_list (dump, formals, criticals, nr_crits, call_list);
    };
}

void dump_fact_tables (FILE *dump)
{ int ix;
  for (ix = 0; ix < nr_of_facts (); ix++)
    { hash_container table = all_fact_tables[ix];
      int hash_size = all_hash_sizes[ix];
      fprintf (dump, "Fact table with nr %d: ", ix);
      dump_fact (dump, ix);
      fprintf (dump, "\n");
      dump_fact_calls (dump, ix, table, hash_size);
    };
}

/*
   Binary saving of entries and entry lists
*/
static void my_bin_save_entries (BinFile bf, int_array el)
{ int ix;
  abs_bin_save_int (bf, el -> size);
  for (ix = 0; ix < el -> size; ix++)
    { entry this_entry = &all_entries[el -> array[ix]];
      abs_bin_save_int (bf, this_entry -> call_id);
      abs_bin_save_int (bf, this_entry -> frequency);
    };
}

void bin_save_entries (BinFile bf)
{ int ix;
  abs_bin_save_int (bf, all_entry_lists -> size);
  for (ix = 1; ix < all_entry_lists -> size; ix++)
    my_bin_save_entries (bf, (int_array) all_entry_lists -> array[ix]);
}

/*
   Binary saving of vocabularies and fact tables
*/
void bin_save_vocabularies (BinFile bf)
{ int ix;
  abs_bin_save_int (bf, all_lexeme_markers -> size);
  for (ix = 0; ix < all_lexeme_markers -> size; ix++)
    { int marker = all_lexeme_markers -> array[ix];
      Vocabulary voc = (Vocabulary) all_vocabularies -> array[ix];
      abs_bin_save_int (bf, marker);
      lxcn_bin_save_trie (bf, voc);
    };
}

void bin_save_fact_table (BinFile bf, int fact_nr)
{ hash_container table = all_fact_tables[fact_nr];
  int hash_size = all_hash_sizes[fact_nr];
  int nont_id = nont_id_from_fact_nr (fact_nr);
  int nr_crits = count_criticals (criticals_from_nonterminal (nont_id));
  int ix;

  abs_bin_save_int (bf, hash_size);
  abs_bin_save_int (bf, nr_crits);
  for (ix = 0; ix < hash_size; ix++)
    { int_array call_list = table[ix];
      int iy;
      if (call_list == int_array_nil)
	{ abs_bin_save_int (bf, 0);
	  continue;
	}
      abs_bin_save_int (bf, call_list -> size / (nr_crits + 1));
      for (iy = 0; iy < call_list -> size; iy++)
	abs_bin_save_int (bf, call_list -> array[iy]);
    };
}

void bin_save_fact_tables (BinFile bf)
{ int nr_facts = nr_of_facts ();
  int ix;

  lxcn_bin_save_trie (bf, all_critical_texts);
  abs_bin_save_int (bf, nr_facts); 
  for (ix = 0; ix < nr_facts; ix++)
    bin_save_fact_table (bf, ix);
}

/*
   Initialization stuff
*/
#define DEFAULT_MAX_MARKERS 8
#define DEFAULT_FACT_HASH_SIZE 65536

void init_entries ()
{ int nr_facts = nr_of_facts ();
  int ix;
  all_entries = (struct entry_rec *) abs_calloc (DEFAULT_ENTRIES, sizeof (struct entry_rec),
						 "init_entries");
  my_nr_of_entries = 0;
  my_room_of_entries = DEFAULT_ENTRIES;
  all_entry_lists = init_ptr_array (DEFAULT_ENTRIES);
  app_ptr_array (all_entry_lists, (void *) NULL);	/* Entry 0 is reserved */

  /* Initialize all vocabularies, lexeme markers and fact tables */
  all_lexeme_markers = init_int_array (DEFAULT_MAX_MARKERS);
  all_vocabularies = init_ptr_array (DEFAULT_MAX_MARKERS);
  all_critical_texts = vocabulary_nil;
  crit_text_vector = init_text_array (DEFAULT_FACT_HASH_SIZE);
  app_text_array (crit_text_vector, abs_new_string ("", "init_entries"));
  all_hash_sizes = (int *) abs_calloc (nr_facts, sizeof (int), "init_entries");
  for (ix = 0; ix < nr_facts; ix++) all_hash_sizes[ix] = DEFAULT_FACT_HASH_SIZE;
  all_fact_tables = (hash_container *) abs_calloc (nr_facts, sizeof (hash_container),
						   "init_entries");
  for (ix = 0; ix < nr_of_facts (); ix++)
    { int iy;
      all_fact_tables[ix] = (hash_container) abs_calloc (all_hash_sizes[ix], sizeof (int_array),
						         "init_entries");
      for (iy = 0; iy < all_hash_sizes[ix]; iy++)
        all_fact_tables[ix][iy] = NULL;
    }
}
