/*
   File: ebase_lexicon_impl.h
   Defines the internal structures to store a lexicon

   Copyright 2012 Marc Seutter
 
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.

   CVS ID: "$Id: ebase_lexicon_impl.h,v 1.9 2012/08/01 12:40:46 marcs Exp $"
*/
#ifndef IncEbaseLexiconImpl
#define IncEbaseLexiconImpl

/* Local includes */
#include "ebase_ds.h"
#include "ebase_input.h"
#include "ebase_vocabulary.h"
#include "ebase_fact_table.h"
#include "ebase_lexicon.h"

/*
   The binary lexicon file is a binary encoded file consisting of the following parts
   Note that the first 11 items are equal to the ones in the lexicon interface file
   This is on purpose to make a comparison whether the binary lexicon needs to be
   regenerated (i.e. if these items have not changed and no dat/fct/trp files are newer
   than during the previous compilation, we do not have to recompile the binary lexicon).

   Although the character set table and regexp table are simply passed on, they are
   incorporated into the binary lexicon file to enable the user to test his lexica
   with independent tools before trying to use them in a full blown grammar.

   1) A list of lexicon names
   2) A list of fact table names
   3) A list of triples file names
   4) All affix super domains
   5) The type table
   6) Those pragmat settings that influence the lexicalization process
   7) The number of facts
   8) The lexicon/fact nonterminal table (defining all nonterminals with their typing)
   9) The terminal table (defining all grammar terminals)
   10) The character set table (defining all characters sets)
   11) The regexp nfa table (defining all nfas for the regexps).

   Parts, generated by lexgen:

   12) The collected set of (actual) affix values
   13) The critical text vocabulary
   14) The set of all calls
   15) The set of all entry lists
   16) All vocabularies (list of (marker, vocabulary) pairs)
   17) All facts
*/

/*
   Define the internal structure of a lexicon
   Note that we allocate a separate pool for the vocabularies that
   belong to this lexicon. This is the only way that we can do a
   controlled free of a lexicon struct. The vocabularies are freed
   by releasing their pool, while all other members are managed
   (ref counted) in the default pool.
*/
struct lexicon_rec
{ /* Pool for vocabularies */
  pool vocabulary_pool;

  /* Flags that influence the lexicalization process */
  int hybrid_parsing;
  int utf8_processing;
  int empty_white_space;
  int_list white_spaces;
  int_list separators;
  int_list translation_sources;
  int_list translation_targets;
  int_list translation_penalties;

  /* Specialized versions of the above to shortcut lexicalization */
  char *white_space_chars;
  char *separator_chars;
  char *translation_map;
  Penalty *translation_map_penalty;
  string_list utf8_translation_targets;

  /* The runtime type system, constant affix values, critical texts */
  rt_domain_list rt_domains;
  rt_type_list rt_types;
  affix_value_list rt_values;
  Vocabulary rt_criticals;

  /* The numbers of the predefined types */
  int rt_int;
  int rt_real;
  int rt_text;

  /* Precompiled lexical elements */
  lex_nont_list rt_lex_nonts;
  terminal_list rt_terminals;
  cset_list rt_character_sets;
  nfa_list  rt_regexp_nfas;

  /* Precompiled calls and entry lists */
  int_list_list rt_lex_calls;
  int_list_list rt_entry_lists;

  /* Vocabularies */
  int nr_rt_vocs;
  int *rt_voc_markers;
  Vocabulary *rt_vocabularies;

  /* Fact tables, including the triple db */
  int nr_facts;
  FactTable *rt_fact_tables;
};

#endif /* IncLxcnLexiconImpl */

