/*
   File: trel_private.h
   Internal types of the trellis library

   Copyright 2009 Radboud University of Nijmegen
 
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.

*/
#ifndef IncTrelPrivate
#define IncTrelPrivate

#include <lxcn_lexicon.h>

#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */

struct state;
struct transition;
struct trellis;

struct trellis {
#if USE_POOL
    Pool pool;
#endif /* USE_POOL */
    struct state **states;	/* ptr to NUM_STATES*length pointers to a state */
    const char *input;
    int length;
    int nclasses;
    int line;
    int col;
    trel_free_trans_callback trans_ext_free;
    trel_free_state_callback state_ext_free;
    trel_new_state_callback state_ext_new;
    Lexicon lexicon;
    const char *white;		/* unlikely to be empty */
    const char *separators;	/* more likely to be empty */
    trel_scanned_lexicon scanned_lexicon;
    trel_scanned_match scanned_match;
    trel_scanned_match scanned_skip;
    trel_scanned_white scanned_white;
    trel_scanned_other scanned_other;
    trel_scanned_eos scanned_eos;
    trel_scanning_completed scanning_completed;
    RegExp *match;
    RegExp *skip;
    struct transition *eos_transition;
};

struct state {
    struct transition **trans;	/* struct transition *trans[nclasses] */ 
    void *state_ext;		/* extension data for library user */
    int position;		/* relative to starting position */
    int line;			/* original line */
    int col;			/* original column */
    short lex_state;		/* one of STATE_S, STATE_W, ... as below */
    short flags;		/* see below */
};

#define STATE_FLAG_FULL_INPUT_WORDS_PRESENT	0x01
#define STATE_FLAG_PART_INPUT_WORDS_PRESENT	0x02
#define STATE_FLAG_FULLY_SCANNED		0x04

struct transition {
    struct transition *next;	/* next poss trans at this position */
    struct state *dest;
    char *text;			/* up to 'length' chars are valid */
    void *trans_ext;		/* extension data for library user */
    int length;
};

static State trel_get_state(Trellis t, int pos, int lex_state);

#define NR_EXTRA_CLASSES	5
#define TERMINAL_CLASS_OFF	0	/* must be first of the extra classes */
#define MATCH_CLASS_OFF		1
#define SKIP_CLASS_OFF		2
#define OTHER_CLASS_OFF		3
#define WHITE_CLASS_OFF		4	/* must be hightest GR_xxx_CLASS */

/*
 * There are 4 states associated with every input position.
 * Matching begins typically before any white space which follows
 * a word, so that suffixes can be properly matched.
 *
 * S	before the white Space.
 *  	Any matching attempt from this state must match space+, or it fails.
 *  	This brings you to state W.
 *  	After the space the really desired terminal is tried.
 *
 * W	at the start of a Word. Depending on what kind of terminals are matched
 * 	(prefix, suffix, etc), the next state will be
 * 	fullword (word) -> E
 * 	prefix   (Y-)   -> I
 * 	(other terminals fail)
 *
 * I	Inside a word: a word cannot end here.
 * 	The next state will be:
 * 	prefix/infix    (y-, -y-)  -> I
 * 	fullword/suffix (word, -y) -> E
 *
 * E	End of a word, and implicitly the begin (S) of the next word.
 * 	A suffix can also be matched and returns to the same state.
 * 	suffix		(-y)  -> E
 *
 * The GLUE operator jumps from S to I, without matching any input.
 *
 * Implicit in this scheme is that these states exist for every single
 * input position, since most state changes described above are associated
 * with consuming a certain amount of input, therefore with different
 * input positions.
 *
 * Because state E of one word equals state S of the next, the outgoing
 * transitions for E are tried first, then those of S.
 * (Fortunately they are disjunct.)
 * The input state is never W for long, only within matching instructions:
 * they start in E/S or I, handle the epsilon transition, match some text
 * and then end in I or E.
 *
 *
 *   space        word           empty
 * S ---> W  ---------------> E ----->  S
 *         \                 /^\
 *          \               / | \
 *           \y-       -y- /  \ | -y
 *            \        /  /    \/
 *             \      V  /
 *              \       / ^
 *               \     / /
 *                V   / -y
 *                 \ / word
 *                 ^I
 *                /  \
 *               |   |
 *               \---/
 *              y-  -y-
 */

#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* IncTrelPrivate */
