/*
   File: arts_ds.h
   Defines all data_structures relevant for the AGFL runtime system

   Copyright 2009-2010 Radboud University of Nijmegen

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.

   CVS ID: "$Id$"
*/
#ifndef IncArtsDs
#define IncArtsDs


/* libabase includes */
#include <abase_porting.h>
#include <abase_repr.h>

/* liblexicon includes */
#include <lxcn_input.h>
#include <lxcn_lexicon.h>

/* libtrellis includes */
#include <trel_input.h>

/* Old defs; will be changed in due time */
typedef int64		ARG;
typedef	int		Terminal;	/* should be used by rtsagfl.c */
typedef int		Position;	/* Define as int32 later */

/*------------------------------------------------------------------------------
// Reserve two special types and top domain values
//----------------------------------------------------------------------------*/
#define INT_TYPE        -1
#define TEXT_TYPE       -2
#define TOP_INT		0x80000000
#define TOP_TEXT	NULL

/*------------------------------------------------------------------------------
// Penalty management
// Penalties are defined to be 32 bit integers
//----------------------------------------------------------------------------*/
#define MAX_PENALTY	((Penalty) 0x7fffffff)
#define MIN_PENALTY	((Penalty) -MAX_PENALTY)
#define UNK_PENALTY	((Penalty) (MIN_PENALTY - 1))
#define NO_PENALTY	((Penalty) 0)

/*------------------------------------------------------------------------------
// Negative memoization
// The idea is that a parse should be blocked if the current penalty
// >= negative memo unless the negative memo is still unknown
//----------------------------------------------------------------------------*/
typedef Penalty		NegMemo;
#define NEGMEMO_BLOCKED MIN_PENALTY
#define NEGMEMO_UNKNOWN	UNK_PENALTY
#define NEGMEMO_SUCCESS MAX_PENALTY

/*------------------------------------------------------------------------------
// Hybrid parsing
// Because of the reference in the posmemo, we need the types here
//----------------------------------------------------------------------------*/
typedef struct hyb_anchor_rec *hyb_anchor;
typedef struct hyb_instr_rec *hyb_code;

/*------------------------------------------------------------------------------
// Positive memoization
// When a parse of a nonterminal has succeeded at a certain point in the input,
// we remember it with its resultant affixes. Positive memoization is also used
// to implement left recursion
//----------------------------------------------------------------------------*/

typedef struct pos_memo_rec
{ /* Description of the recognized rule */
  int nont_nr;
  int nr_formals;
  int nr_variables;			/* formals + locals */
  int nr_sons;

  /* Contains formals, locals, posmemo's of sons and typing of sons */
  Value *variables;

  /* Next info */
  State next_state;			/* Next trellis state */
  State this_state;			/* Starting trellis state */
  Penalty penalty;			/* Penalty of this rule */
  void *pass2;

  /* Posmemo admin */
  struct pos_memo_rec* next;		/* Next solution pointer */
  struct pos_memo_rec* ambig;		/* Pointer to ambiguous nodes */
  struct pos_memo_rec* prime;		/* Pointer to first ambiguous node */
  int refcount;				/* How many times is this used? */

  /* For hybrid parsing */
  hyb_anchor hybrid;

  /* Flags for left recursion */
  char flags;				/* 2 = acceptable, 1 = young */
} PMPROD, *PosMemo;

/* Define shorthands for unknown and blocked PosMemo */
#define POSMEMO_UNKNOWN ((PosMemo) 0L)
#define POSMEMO_BLOCKED ((PosMemo) 1L)

/*----------------------------------------------------------------------------
// Bit-fields of TransitionExtension.type (16 bits)
//--------------------------------------------------------------------------*/
#define WhiteBit	0x0001		/* "terminal" is whitespace */
#define FactBit		0x0020		/* terminal is fact */
#define OtherBit        0x0040		/* terminal is other  */
#define TermBit		0x0080		/* terminal is grammar lexeme */
#define LexBit		0x0100		/* terminal is lexicon lexeme */
#define MatchBit	0x0200		/* terminal is regexp match */
#define SkipBit		0x0400		/* terminal is regexp skip */
#define EosBit		0x2000		/* EOS */

/*----------------------------------------------------------------------------
// Bit-fields of TransitionExtension.flags (8 bits)
//--------------------------------------------------------------------------*/
#define UsedBit		0x0001		/* Transition was used by parser */

/*----------------------------------------------------------------------------
// A Transition contains the info associated with a terminal starting at a
// certain character position.
//--------------------------------------------------------------------------*/

typedef struct TransitionExtension
{ Terminal		terminal;	/* Bitwise encoding */
  Value *		params;
  uint16_t		type;
  uint8_t		lextype;
  uint8_t		flags;
  Penalty		penalty;
} *TransitionExtension;

#define GetTransitionExtension(state)	((TransitionExtension) trel_trans_get_ext(state))

/*--------------------------------------------------------------------------
// A State contains info associated with a certain position in the text.
// The nr of negative memos allocated with this node is given by agfl-coder.
// The nr of positive memos is equal to the number of syntax rules.
//------------------------------------------------------------------------*/

/*----------------------------------------------------------------------------
// Bit-fields of StateExtension.flags (8 bits)
//--------------------------------------------------------------------------*/
#define MemoBit		0x0001		/* Director set was used for memos  */

typedef struct StateExtension
{ NegMemo* neg_memos;		/* nr is given by agfl-coder */
  PosMemo* pos_memos;		/* Positive memo array (per rule) */
  char *nest_markers;		/* markers for each nest + one global one */
  Transition facts;		/* Cached facts at this state (linked list) */
  int penalty_adjustment;
  /* int flags; */
} *StateExtension;

#define GetStateExtension(state)	((StateExtension) trel_state_get_ext(state))

/*---------------------------------------------------------------------------
// Note that we assume an int to have at least 32 bits
// in the coding of Transition.terminal:
// 
// for nonterminals: 4 bits type, 20 bits number, 8 bits arity
// for terminals, regexp_match, and regexp_skip: 4 bits type, 28 bits number
// typebits 0000 nonterminal
//	    0001 terminal
//	    0010 regexp_match
//	    0011 regexp_skip
//	    0100 other, whitespace, other one-off special tokens
//	    0101 fact
//-------------------------------------------------------------------------*/
#define DECODE_TERM_TYPE(term)		(((term) >> 28) & 0xF)
#define TERM_IS_NONT(term)		(((term) & 0xF0000000) == 0x00000000)
#define DECODE_NONT_NUMBER(nont)	(((nont) >> 8) & 0xFFFFF)
#define DECODE_NONT_ARITY(nont)		((nont) & 0xFF)
#define TERM_IS_TERM(term)		(((term) & 0xF0000000) == 0x10000000)
#define DECODE_TERM_NUMBER(term)	((term) & 0x0FFFFFFF)
#define TERM_IS_MATCH(term)		(((term) & 0xF0000000) == 0x20000000)
#define TERM_IS_SKIP(term)		(((term) & 0xF0000000) == 0x30000000)
#define TERM_IS_OTHER(term)		((term) == 0x40000000)
#define TERM_IS_WHITE(term)		((term) == 0x40000001)
#define TERM_IS_FACT(term)		(((term) & 0xF0000000) == 0x50000000)
#define DECODE_REGEXP_NUMBER(term)	((term) & 0x0FFFFFFF)
#define ENCODE_NONT(nr,arity)		(((nr) << 8) | arity)
#define ENCODE_TERM(nr)			((nr) | 0x10000000)
#define ENCODE_MATCH(nr)		((nr) | 0x20000000)
#define ENCODE_SKIP(nr)			((nr) | 0x30000000)
#define ENCODE_OTHER()			(0x40000000)
#define ENCODE_WHITE()			(0x40000001)
#define ENCODE_FACT(nr,arity)		(((nr) << 8) | arity | 0x50000000)
#define DECODE_TERM_OR_RE_CLASS(term)	((((term) >> 28) & 0xf) - 1 + arts_ifd.nr_lexicon_nonterminals)

/*------------------------------------------------------------------------------
// A machine word of the abstract machine should be large enough to hold
// an address. For architectures with 64 bit addresses, ints loaded from the
// object code are therefore sign extended to 64 bit ints
//
// On architecture with 32 bit addresses, machine words are only 32 bit large
//----------------------------------------------------------------------------*/
typedef union Cel CODE;
typedef union Cel DATA;
typedef union Cel
{   void        *action;
    char        *str;
    ARG         arg;
    Value       val;
    CODE        *code;
    DATA        *data;
    State	input_state;
    Transition  input_transition;
    PosMemo     pmprod;
    Penalty     penalty;
    int64	ilval;
    int         ival;
    int		*iptr;
} cel;

/*------------------------------------------------------------------------------
// The central datastructure of the abstract machine
//----------------------------------------------------------------------------*/
struct interface_rec
{ /* File names */
  char *	grammar_name;
  char *	input_fname;
  char *	output_fname;
  char *	input_pos_fname;

  /* The size of the code, data and rotext segment */
  size_t	code_size;
  size_t	data_size;
  size_t	rotext_size;

  /* The three segments */
  CODE *	code;
  DATA *	data;
  char *	rotext;

  /* The Lexicon */
  Lexicon	lexicon;

  /* Interface section */
  int		gra_version;
  int		nr_lexicon_nonterminals;
  int		nr_terminals;
  int		nr_match_regexps;
  int		nr_skip_regexps;
  int		nr_syntax_nonterminals;
  int		nr_lrec_nests;
  int		nr_neg_memos;
  int		nr_choices;
  int		nr_positions;
  int		eos_terminal;

  /* Options set by compiler */
  int		neg_memo_option;
  int		directors_option;

  /* Options set by compiler, (should be) overrulable by command line options */
  int		hybrid_parsing_option;
  int		closed_triple_db_option;
  int		no_tdb_option;
  int		counters_option;	/* Should become cumulative */
  int		profile_option;
  int		trace_option;
  int		generate_option;
  int		segment_mode;
  int 		paragraph_mode;
  char *	alphabet_fname;
  int		triple_translate;
  int		hash_production;

  /* Auxilary fields for lexer */
  char *	white_space_chars;
  char *	separator_chars;
  char *	radix_pragmat;
  char *	translate_src;
  char *	translate_dst;
  int *		translate_penalties;

  /* Options not yet set by compiler, but deemed desirable */
  int		max_parses;
  int		max_posmemo_queue_length;
  int		max_terminal_parses;

  /* Other runtime options, only set by command line */
  int		parsing_stats_option;
  int		research_stats_option;
  int		lexer_stats_option;
  int		total_stats_option;
  int		best_parsings_option;
  int		lcsdoc_sync_option;
  int		transduce_option;
  int		identity_transduction;
  int		label_bracket;
  int		no_output;
  int		graph_option;
  int		max_parsetime;
  int		max_edit_distance;
  int		transition_penalties_option;
  int		show_posmemo_option;
  int		show_triple_lookups_option;
  int		triple_stats_option;
  int		free_mem_option;	/* if set, be leak-check clean */
  int		kees_option;
  int		suppress_identical_transductions;
  int		directors_set_posmemos;
  int		absorb_equivalents;
  struct radixes {
      int	lexicon_frequency;
      int	tripledb_frequency;
      int	fact_frequency;
      int	penalty;
  } radix;
  int		max_penalty;
  int		print_autopos;

  /* Interface to tables */
  DATA *	affix_masks;		/* Bitset64 *  */
  DATA *	affix_weights;		/* Bitset64 ** */
  DATA *	nont_formals_domains;	/* int64 ** */
  DATA * 	term_negmemo_dir;	/* int64 ** called term_memo_dir in .s file */
  DATA * 	match_negmemo_dir;	/*   "             match_memo_dir */
  DATA * 	skip_negmemo_dir;	/*   "             skip_memo_dir  */
  DATA * 	lex_negmemo_dir;	/*   "             lex_memo_dir   */
  DATA *	lrec_nests_table;	/* int64 ** */
  DATA *	lex_nont_nrs_table;
  /* >= 0: lex-nonterminal class */
#define RULE_NONE	0		/* -1 */
#define RULE_GRAM	1		/* -2 */
#define RULE_TERMINAL	2		/* -3 */
#define RULE_LEX	3		/* -4 */
#define IS_TERMINAL(i)	((i) == -RULE_TERMINAL-1)
#define IS_LEX(i)	((i) == -RULE_LEX-1)
  DATA *	affix_names;		/* char ** */
  DATA *	term_names;		/* char ** */
  DATA *	nonterm_names;		/* char ** */
  DATA *	match_regexp_names;	/* char ** */
  DATA *	skip_regexp_names;	/* char ** */
  DATA * 	term_posmemo_dir; 	/* int64 ** */
  DATA * 	match_posmemo_dir;	/* int64 ** */
  DATA * 	skip_posmemo_dir;	/* int64 ** */
  DATA * 	lex_posmemo_dir;	/* int64 ** */
  DATA * 	other_posmemo_dir;	/* int64 ** */

  /* Statistics gathering */
  int64 *	nonterminal_profile_table;
  int64 *	alternatives_profile_table;	/* size: nr_positions */
  int64		nr_match_agains;
  int64		nr_lex_match_agains;
  int64		nr_fact_match_agains;
};

extern struct interface_rec arts_ifd; 
void arts_init_ds ();

#endif /* IncArtsDs */
