/*
   File: lxcn_lexicon.c
   Maintains a lexicon and reads a lexicon as generated by agfl-lexgen

   Copyright 2007 Radboud University of Nijmegen
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
 
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU Library General Public License for more details.
 
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

   CVS ID: "$Id: lxcn_lexicon.c,v 1.5 2007/10/31 14:58:06 marcs Exp $"
*/

/* standard includes */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* libabase includes */
#include <abase_repr.h>
#include <abase_error.h>
#include <abase_fileutil.h>
#include <abase_memalloc.h>

/* local includes */
#include "lxcn_vocabulary.h"
#include "lxcn_lexicon.h"
#include "lxcn_lexicon_impl.h"

/*------------------------------------------------------------------------------
// Format of lexicon file (as delivered by agfl-lexgen)
//
// 0a) lexicon header
// 0b) int		nr_lexicons
// 0c) char *		lexicon_names[]
//
// The affix table: the nr of lif affixes is saved to check for lexicon recompilation.
//
// 1a) int		nr_lif_affixes
// 1b) int      	nr_set_affixes
// 1c) setaffix		set_affixes[]
// 2a) int		nr_int_affixes
// 2b) int		int_affixes[]
// 3a) int		nr_text_affixes
// 3b) char *		text_affixes[]
//
// The nonterminal and call table:
//
// 4a) int		nr_nonterminals
// 4b) nonterminal	nonterminals[]
// 5a) int		nr_calls
// 5b) call		calls
//
// The mapping from list nr to calls
// 6a) int		nr_entries
// 6b) entry		all_entries (saved as int *)
//
// The vocabularies
// 7a) int		nr_vocs
// 7b) int		markers
// 7c) Vocabulary	vocabularies
//----------------------------------------------------------------------------*/

/* Skip row of texts containing lexicon names */
static void bin_skip_text_array (BinFile bf)
{ int size, ix;
  char *sval;
  abs_bin_load_int (bf, &size);
  for (ix = 0; ix < size; ix++)
    { abs_bin_load_string (bf, &sval);
      abs_free (sval, "bin_skip_text_array");
    };
}

static void bin_load_set_affix (BinFile bf, SetAffix *affix)
{ int ix;
  abs_bin_load_string (bf, &affix -> name);
  abs_bin_load_u_int  (bf, &affix -> bitset);
  abs_bin_load_int (bf, &affix -> is_nont);
  abs_bin_load_int (bf, &affix -> nr_lhs_sides);
  affix -> lhs = (int *) abs_calloc (affix -> nr_lhs_sides, sizeof (int), "bin_load_set_affix");
  for (ix = 0; ix < affix -> nr_lhs_sides; ix++)
    abs_bin_load_int (bf, &affix -> lhs[ix]);
}

static void bin_load_affix_values (BinFile bf, NewLexicon lex)
{ int nr_lif_affixes, ix;

  /* Load set affixes */
  abs_bin_load_int (bf, &nr_lif_affixes);	/* Ignore value */
  abs_bin_load_int (bf, &lex -> nr_set_affixes);
  lex -> set_affixes = (SetAffix *) abs_calloc (lex -> nr_set_affixes, sizeof (SetAffix),
				   		"bin_load_affix_values");
  for (ix = 0; ix < lex -> nr_set_affixes; ix++)
    bin_load_set_affix (bf, &lex -> set_affixes[ix]);

  /* load int affixes */
  abs_bin_load_int (bf, &lex -> nr_int_affixes);
  lex -> int_affixes = (int *) abs_calloc (lex -> nr_int_affixes, sizeof (int),
					   "bin_load_affix_values");
  for (ix = 0; ix < lex -> nr_int_affixes; ix++)
    abs_bin_load_int (bf, &lex -> int_affixes[ix]);

  /* load text affixes */
  abs_bin_load_int (bf, &lex -> nr_text_affixes);
  lex -> text_affixes = (char **) abs_calloc (lex -> nr_text_affixes, sizeof (char *),
					      "bin_load_affix_values");
  for (ix = 0; ix < lex -> nr_text_affixes; ix++)
    abs_bin_load_string (bf, &lex -> text_affixes[ix]);
}

static void bin_load_nonterminal (BinFile bf, Nonterminal *nont)
{ int ix;
  abs_bin_load_string (bf, &nont -> name);
  abs_bin_load_int (bf, &nont -> nont_nr);
  abs_bin_load_int (bf, &nont -> arity);
  nont -> fpars = (int *) abs_calloc (nont -> arity, sizeof (int), "bin_load_nonterminal");
  for (ix = 0; ix < nont -> arity; ix++)
    abs_bin_load_int (bf, &nont -> fpars[ix]);
}

static void bin_load_nonterminals (BinFile bf, NewLexicon lex)
{ int ix;
  abs_bin_load_int (bf, &lex -> nr_nonterminals);
  lex -> nonterminals = (Nonterminal *) abs_calloc (lex -> nr_nonterminals, sizeof (Nonterminal),
						    "bin_load_nonterminals");
  for (ix = 0; ix < lex -> nr_nonterminals; ix++)
    bin_load_nonterminal (bf, &lex -> nonterminals[ix]);
}

char *lxcn_get_nonterminal_name (NewLexicon lex, int nont_nr)
{ Nonterminal nont = lex -> nonterminals[nont_nr];
  return (nont.name);
}

static void bin_load_call (BinFile bf, Call *call, NewLexicon lex)
{ int ix, nr_fpars;
  abs_bin_load_int (bf, &call -> nont_id);
  nr_fpars = lex -> nonterminals[call -> nont_id].arity;
  call -> actuals = (int *) abs_calloc (nr_fpars, sizeof (int), "bin_load_call");
  for (ix = 0; ix < nr_fpars; ix++)
    abs_bin_load_int (bf, &(call -> actuals[ix]));
}

static void bin_load_calls (BinFile bf, NewLexicon lex)
{ int ix;
  abs_bin_load_int (bf, &lex -> nr_calls);
  lex -> calls = (Call *) abs_calloc (lex -> nr_calls, sizeof (Call), "bin_load_calls");
  for (ix = 0; ix < lex -> nr_calls; ix++)
    bin_load_call (bf, &lex -> calls[ix], lex);
}

static void bin_load_entries (BinFile bf, NewLexicon lex)
{ int ix;
  abs_bin_load_int (bf, &lex -> nr_lex_entries);
  lex -> all_entries = (int **) abs_calloc (lex -> nr_lex_entries, sizeof (int *),
					    "bin_load_entries");
  lex -> all_entries[0] = NULL;
  for (ix = 1; ix < lex -> nr_lex_entries; ix++)
    { int nr_in_list, iy;
      int *list;
      abs_bin_load_int (bf, &nr_in_list);
      list = (int *) abs_calloc (2 * nr_in_list + 1, sizeof (int), "bin_load_entries");
      lex -> all_entries[ix] = list;
      list[0] = nr_in_list;
      for (iy = 0; iy < nr_in_list; iy++)
	{ abs_bin_load_int (bf, &list [2 * iy + 1]);	/* call_id */
	  abs_bin_load_int (bf, &list [2 * iy + 2]);	/* frequency */
	};
    };
}

static void bin_load_vocabularies (BinFile bf, NewLexicon lex)
{ int ix;
  abs_bin_load_int (bf, &lex -> nr_vocabularies);
  lex -> all_lexeme_markers = (int *) abs_calloc (lex -> nr_vocabularies, sizeof (int),
						  "bin_load_vocabularies");
  lex -> all_vocabularies = (Vocabulary *) abs_calloc (lex -> nr_vocabularies, sizeof (Vocabulary),
						       "bin_load_vocabularies");
  for (ix = 0; ix < lex -> nr_vocabularies; ix++)
    { abs_bin_load_int (bf, &lex -> all_lexeme_markers[ix]);
      lxcn_bin_load_trie (bf, &lex -> all_vocabularies[ix]);
    };
}

NewLexicon lxcn_read_lexicon (char *path)
{ NewLexicon lex = (NewLexicon) abs_malloc (sizeof (struct new_lexicon_rec), "lxcn_read_lexicon");
  BinFile bf = abs_bin_fopen (path, "r");
  abs_bin_verify_version (bf, "binary lexicon");
  bin_skip_text_array (bf);
  bin_load_affix_values (bf, lex);
  bin_load_nonterminals (bf, lex);
  bin_load_calls (bf, lex);
  bin_load_entries (bf, lex);
  bin_load_vocabularies (bf, lex);
  abs_bin_verify_eof (bf);
  abs_bin_fclose (bf);
  return (lex);
}

void lxcn_free_lexicon (NewLexicon lex)
{ /* For the moment, just do not free */
}

int lxcn_get_entries_from_nr (NewLexicon lex, int entry_nr, int **entries)
{ int *list = lex -> all_entries[entry_nr];
  *entries = list;
  return (list[0]);
}

void lxcn_get_entry_params (NewLexicon lex, int *entries, int idx,
                            int *ret_nont_nr, int *ret_arity, int *ret_freq, Value **ret_params)
{ int call_id, freq, nont_id, nont_nr, arity, ix;
  int *fpars, *actuals;
  Nonterminal nont;
  Value *params;
  Call call;

  /* Check for a valid index */
  if ((idx < 0) || (idx >= *entries))
    abs_bug ("lxcn_get_entry_params", "Illegal entry index %d", idx);

  /* Pick up the call and frequency */
  call_id = entries[2 * idx + 1];
  freq = entries[2 * idx + 2];
  call = lex -> calls[call_id]; 

  /* Pick up nonterminal nr, arity and formal args */
  nont_id = call.nont_id;
  actuals = call.actuals;
  nont = lex -> nonterminals[nont_id];
  nont_nr = nont.nont_nr;
  arity = nont.arity;
  fpars = nont.fpars;

  /* Note that we do not return TaggedValues as the tag is nowhere used in the RTS */
  params = (Value *) abs_calloc (arity, sizeof (Value), "lxcn_get_entry_params");
  
  /* Fill in actual args, depending on formal argument type */
  for (ix = 0; ix < arity; ix++)
    switch (fpars[ix])
      { case FormalINT:  params[ix].int_par  = lex -> int_affixes[actuals[ix]]; break;
	case FormalTEXT: params[ix].text_par = lex -> text_affixes[actuals[ix]]; break;
	default: /* Short Set affix */
	  params[ix].set_par = lex -> set_affixes[actuals[ix]].bitset;
      };

  /* Return the santenkraam */
  *ret_nont_nr = nont_nr;
  *ret_arity = arity;
  *ret_freq = freq;
  *ret_params = params;
}

void lxcn_print_entry (NewLexicon lex, int *entries, int idx)
{ int call_id, freq, nont_id, arity, ix;
  int *fpars, *actuals;
  Nonterminal nont;
  Call call;

  /* Check for a valid index */
  if ((idx < 0) || (idx >= *entries))
    abs_bug ("lxcn_get_entry_params", "Illegal entry index %d", idx);

  /* Pick up the call and frequency */
  call_id = entries[2 * idx + 1];
  freq = entries[2 * idx + 2];
  call = lex -> calls[call_id]; 

  /* Pick up nonterminal nr, arity and formal args */
  nont_id = call.nont_id;
  actuals = call.actuals;
  nont = lex -> nonterminals[nont_id];
  abs_printf ("%s", nont.name);
  arity = nont.arity;
  fpars = nont.fpars;

  /* Print the santenkraam */
  if (arity)
    { abs_printf (" (");
      for (ix = 0; ix < arity; ix++)
        { if (ix) abs_printf (", ");
	  switch (fpars[ix])
	    { case FormalINT:  abs_printf ("%d", lex -> int_affixes[actuals[ix]]); break;
	      case FormalTEXT: abs_printf ("\"%s\"", lex -> text_affixes[actuals[ix]]); break;
	      default: /* Short Set affix */
	        abs_printf ("%s", lex -> set_affixes[actuals[ix]].name);
	    };
	};
      abs_printf (")");
    };
  abs_printf ("\n");
}
