/*
   File: trel_input.c
   Manage input and store in a trellis.

   Copyright 2009 Radboud University of Nijmegen
 
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.

*/

#include <string.h>
#include <assert.h>
#include <stdio.h>
#include <limits.h>

/*
 * If USE_POOL is set to 1, a memory pool will be used.
 * Since the trellis can currently only be deleted as a whole, this
 * fits nicely.
 * For later, it would be nice to have a free list manager (such as
 * MemInfo) added to the pool library.
 */
#define USE_POOL	1

/* Libabase includes */
#include <abase_porting.h>
#include <abase_memalloc.h>
#include <abase_error.h>
#if USE_POOL
#include <abase_pool_alloc.h>
#endif

/* Liblexicon includes */
#include "lxcn_search.h"

/* Local includes */
#include "trel_input.h"
#include "trel_private.h"

/* Include pattern.h from gen/src */
#include <pattern.h>		/* XXX copy of application, for now XXX */

#define is_white(t, ch)		strchr((t)->white, (ch))
#define is_separator(t, ch)	strchr((t)->separators, (ch))

static Transition scan_any_to_whitepace(Trellis t, State s, int loclass, int hiclass);
static int is_state_white_space(Trellis t, State dest);
static int completes_full_input_word(Trellis t, Transition trans);

/*
 * We want to distinguish between having not looked at possible transitions
 * yet, and having looked but found none.
 * This dummy transition is used for "not looked yet".
 * NULL is used for "looked, but found nothing".
 */
static struct transition no_transition_tried = {
  NULL, NULL, "no_transition_tried", NULL, 0
};

#define DB_TREL(x)

Transition trel_alloc_transition(Trellis t)
{
#if USE_POOL
    Transition trans = abs_pool_malloc(t->pool, sizeof(struct transition), "trel_alloc_transition");
#else
    Transition trans = abs_malloc(sizeof(struct transition), "trel_alloc_transition");
#endif
    /* set some initial values */
    *trans = no_transition_tried;

    return trans;
}

void trel_free_transition(Trellis t, Transition trans)
{
    if (t->trans_ext_free) {
	t->trans_ext_free(t, trans);
    } else {
	abs_free(trans->trans_ext, "trel_free_transition");
    }
#if !USE_POOL
    if (trans->text != NULL && trans->text != no_transition_tried.text)
	abs_free(trans->text, "trel_free_transition");
    abs_free(trans, "trel_free_transition");
#endif
}

static void trel_free_transitions(Trellis t, Transition trans)
{
    Transition next;

    if (trans == &no_transition_tried) {
	DB_TREL(printf("trel_free_transitions: trans=no_transition_tried\n");)
	return;
    }

    while (trans) {
	next = trans->next;
	DB_TREL(printf("trel_free_transitions: trans=%p, next=%p\n", trans, next);)
	trel_free_transition(t, trans);
	trans = next;
    }
}

static void free_state(Trellis t, State s)
{
    int i;

    if (!s) {
	DB_TREL(printf("free_state: does not exist\n");)
	return;
    }

    if (t->state_ext_free)
	t->state_ext_free(t, s);
    else
	abs_free(s->state_ext, "free_state");

    for (i = 0; i < t->nclasses; i++) {
	DB_TREL(printf("free_state: free class %d\n", i);)
	trel_free_transitions(t, s->trans[i]);
    }
#if !USE_POOL
    abs_free(s->trans, "free_state");
#endif
    s->trans = NULL;
#if !USE_POOL
    abs_free(s, "free_state");
#endif
}

void trel_free_states_before(Trellis t, State s)
{
    int i, upb;

    DB_TREL(printf("free_states_before: before pos %d\n", s->position);)
    upb = s->position * NUM_LEX_STATES;
    for (i = 0; i < upb; i++) {
	DB_TREL(printf("free_states_before: free pos %d\n", i);)
	free_state(t, t->states[i]);
	t->states[i] = NULL;
    }
}

static void free_states(Trellis t)
{
    int i, upb;

    upb = t->length * NUM_LEX_STATES;
    for (i = 0; i < upb; i++) {
	DB_TREL(printf("free_states: free pos %d\n", i);)
	free_state(t, t->states[i]);
    }
#if !USE_POOL
    abs_free(t->states, "free_states");
#endif
    t->states = NULL;
}

static State init_state(Trellis t, int pos, int lex_state)
{
    int idx;
    DB_TREL(printf("init_state: pos=%d, t->length=%d\n", pos, t->length);)
    assert (pos < t->length);
    idx = pos * NUM_LEX_STATES + lex_state;
    if (t->states[idx] == NULL) {
	int i;

#if USE_POOL
	State s = abs_pool_malloc(t->pool, sizeof(struct state), "init_state");
	s->trans = abs_pool_calloc(t->pool, t->nclasses, sizeof(struct transition *), "init_state");
#else
	State s = abs_malloc(sizeof(struct state), "init_state");
	s->trans = abs_calloc(t->nclasses, sizeof(struct transition *), "init_state");
#endif
	for (i = 0; i < t->nclasses; i++) {
	    s->trans[i] = &no_transition_tried;
	}
	s->position = pos;
	s->line = t->line;
	s->col = t->col + s->position;
	s->lex_state = lex_state;
	s->flags = 0;
	t->states[idx] = s;

	if (t->state_ext_new) {
	    t->state_ext_new(t, s);
	}

	return s;
    }

    return t->states[idx];
}

void
trel_set_new_state_callback(Trellis t, trel_new_state_callback f)
{
    t->state_ext_new = f;
}

static
int determine_next_lex_state(int lex_state, LexemeType lextype)
{
    DB_TREL(printf("determine_next_lex_state: %d type %d\n", lex_state, lextype);)
    switch (lex_state) {
    case LEX_STATE_W:
	switch (lextype) {
	case SingleToken:
	case MultiToken:
	    return LEX_STATE_E;
	case Prefix:
	    return LEX_STATE_I;
	default:
	    return LEX_STATE_ERROR;
	}
    case LEX_STATE_I:
	switch (lextype) {
	case SingleToken:
	case MultiToken:
	case Suffix:
	    return LEX_STATE_E;
	case Infix:
	case Prefix:
	    return LEX_STATE_I;
	default:
	    return LEX_STATE_ERROR;
	}
    case LEX_STATE_E:
	switch (lextype) {
	case Suffix:
	    return LEX_STATE_E;
	case Infix:
	    return LEX_STATE_I;
	default:
	    return LEX_STATE_ERROR;
	}
    default:
	return LEX_STATE_ERROR;
    }
}

#define NR_LEXICON_CLASSES(t)  (t->nclasses - NR_EXTRA_CLASSES)
#define GR_TERM_CLASS(t)  (t->nclasses - NR_EXTRA_CLASSES + TERMINAL_CLASS_OFF)
#define GR_MATCH_CLASS(t) (t->nclasses - NR_EXTRA_CLASSES + MATCH_CLASS_OFF)
#define GR_SKIP_CLASS(t)  (t->nclasses - NR_EXTRA_CLASSES + SKIP_CLASS_OFF)
#define GR_OTHER_CLASS(t) (t->nclasses - NR_EXTRA_CLASSES + OTHER_CLASS_OFF)
#define GR_WHITE_CLASS(t) (t->nclasses - NR_EXTRA_CLASSES + WHITE_CLASS_OFF)

/*
 * Scan the input for things that may occor in the lexicon.
 * This may find lexicon nonterminals, and terminals, since they
 * are also stored in the lexicon.
 *
 * Must use s->lex_state to determine which lexemes to accept and which
 * new state they lead to.
 *
 * In any position, if there are full word (or multiword) transitions,
 * don't bother with part-words.
 */
static
void scan_lexicon(Trellis t, State s)
{
    const char *from = t->input + s->position;
    const char *to;
    int i;
    int part_words_present = 0;
    int full_words_present = 0;
    LexiconIterator lex_iter;

    /*
     * Set all lexicon-related pointers from "untried" to "nothing found"
     */
    for (i = NR_LEXICON_CLASSES(t)- 1; i >= 0; i--) {
	if (s->trans[i] == &no_transition_tried) {
	    s->trans[i] = NULL;
	} else {
	    DB_TREL(printf("class %d not untried??\n", i);)
	}
    }
    i = GR_TERM_CLASS(t);
    if (s->trans[i] == &no_transition_tried) {
	s->trans[i] = NULL;
    } else {
	DB_TREL(printf("terminal class (%d) not untried??\n", i);)
    }

    lex_iter = lxcn_init_lexicon_match ((char *)from, t->lexicon);

    while ((to = lxcn_next_lexicon_match (lex_iter)) != NULL) {
	int length = (int)(to - from);	/* length of input, not lex data */
	char *lexeme;
	int nr_entries;
	int entry_nr;
	int penalty;
	int *entries;
	int ix;
	LexemeType lex_type;		/* fullword, prefix, etc... */
	int next_lex_state;
	State dest;
	int is_full_word = 0;
	int is_full_input_word = 0;

	/* Get the entry number for this match */
	lxcn_get_lexicon_match_info (lex_iter, &entry_nr, &lexeme, &lex_type, &penalty);

	next_lex_state = determine_next_lex_state(s->lex_state, lex_type);

	if (next_lex_state == LEX_STATE_ERROR)
	    continue;

	dest = init_state(t, s->position + length, next_lex_state);
	/*
	 * Make sure that full words have precendence over part words:
	 * if there are any of the former, we're not interested in the latter.
	 */
	//is_full_input_word = is_state_white_space(t, dest);
	is_full_input_word = s->lex_state == LEX_STATE_W &&
			     is_state_white_space(t, dest);
#define FULL_WORD_INTERPRETATION 3
#if FULL_WORD_INTERPRETATION == 1
	/*
	 * This interpretation of a "full word" is from the input side:
	 * does it cover the input from whitespace to whitespace?
	 * (or separator)
	 */
	is_full_word = is_full_input_word;
#elif FULL_WORD_INTERPRETATION == 2
	/*
	 * This interpretation of a "full word" is from the lexicon side:
	 * does it have "glue-hyphens"?
	 * This is in general less strict, since it may interpret a full
	 * word in interpretation #1 in separate pieces.
	 */
	is_full_word = (lex_type == SingleToken ||
		        lex_type == MultiToken);
#elif FULL_WORD_INTERPRETATION == 3
	/*
	 * This interpretation of a "full word" is both.
	 */
	is_full_word = is_full_input_word && (lex_type == SingleToken ||
		                              lex_type == MultiToken);
#else
	/*
	 * Fallback: ignore the "full word priority" nonsense.
	 * Nothing is a full word.
	 */
	is_full_word = 0;
#endif /* FULL_WORD_INTERPRETATION */

	DB_TREL(printf("from lexicon: '%.*s' is full iw:%d is fw:%d, fw present:%d, prts present:%d\n",
			length, from,
			is_full_input_word,
			is_full_word,
			full_words_present,
			part_words_present);)

	if (is_full_word) {
	    full_words_present++;
	    if (part_words_present) {
		/* erase the part words seen so far (i.e. all transitions) */
		DB_TREL(abs_message("scan_lexicon: erase part words at pos %d", s->position);)
		for (i = GR_TERM_CLASS(t); i >= 0; i--) {
		    DB_TREL(abs_message("scan_lexicon: free class %d", i);)
		    trel_free_transitions(t, s->trans[i]);
		    s->trans[i] = NULL;
		}
		part_words_present = 0;
	    }
	} else {
	    /* Don't want any parts if there are full words */
	    if (full_words_present)
		continue;
	    part_words_present++;
	}

	nr_entries = lxcn_get_entries_from_nr (t->lexicon, entry_nr, &entries);
	for (ix = 0; ix < nr_entries; ix++) {
	    int nont_class = 0;
	    Transition trans = trel_alloc_transition(t);
	    trans->dest = dest;
#if USE_POOL
	    trans->text = abs_pool_new_string(t->pool, lexeme, "scan_lexicon");
#else
	    trans->text = abs_new_string(lexeme, "scan_lexicon");
#endif	/* USE_POOL */
	    trans->length = length;

	    if (t->scanned_lexicon) {
		nont_class = t->scanned_lexicon(t, s, trans, entries, ix, lex_type);
		assert(nont_class < t->nclasses);
		if (nont_class < 0)
		    nont_class = GR_TERM_CLASS(t);
	    }

	    trans->next = s->trans[nont_class];
	    s->trans[nont_class] = trans;

	    if (is_full_input_word) {
		s->flags |= STATE_FLAG_FULL_INPUT_WORDS_PRESENT;
	    } else {
		s->flags |= STATE_FLAG_PART_INPUT_WORDS_PRESENT;
	    }
	}
    }

    /*
     * Final call for client to clean up or do post-processing.
     */
    if (t->scanned_lexicon) {
	t->scanned_lexicon(t, s, NULL, NULL, 0, 0);
    }
    lxcn_finish_lexicon_match(lex_iter);
}

char *
trel_strndup(Trellis t, const char *string, int len, const char *where)
{
#if USE_POOL
    char *newstring = abs_pool_malloc_unaligned(t->pool, len + 1, where);
#else
    char *newstring = abs_malloc(len + 1, where);
#endif
    strncpy(newstring, string, len + 1);
    newstring[len] = '\0';
    return newstring;
}

/*
 * Scan the longest possible stretch of white space here.
 * TODO: what to do about prefixes of this? There may be other tokens
 * that start with white space that may match some of it...
 */
static
void scan_white(Trellis t, State s, int nont_class)
{
    const char *from = t->input + s->position;
    const char *to = from;
    Transition trans;
    State dest;
    int length;

    assert(s->lex_state == LEX_STATE_S);

    s->trans[nont_class] = NULL;

    while (*to) {
	if (!is_white(t, *to))
	    break;
	to++;
    }

    /* There must be at least one space */
    if (to == from) {
#if 0
 	/*
 	 * unless the whitespace set is empty.
 	 */
 	if (t->white[0] == '\0') {
 	    /* don't return but make an epsilon transition */
 	    DB_TREL(printf("scan_white: empty whitepace set: epsilon transition pos %d\n", s->position);)
 	} else
#endif
	    /*
	     * or unless we're in front of a separator character
	     */
	if (t->separators &&
		(is_separator(t, from[0])
#if 1
		 /* (or behind one) */
	         || (s->position > 0 && is_separator(t, from[-1]))
#endif
				      )) {
	    /* don't return but make an epsilon transition */
	    DB_TREL(printf("scan_white: at separator: epsilon transition pos %d\n", s->position);)
	} else

	    return;
    }

    /* 'to' points now past the white space */
    length = (int)(to - from);

    dest = init_state(t, s->position + length, LEX_STATE_W);

    trans = trel_alloc_transition(t);
    trans->dest = dest;
    trans->text = trel_strndup(t, from, length, "scan_white");
    trans->length = length;

    if (t->scanned_white) {
	t->scanned_white(t, s, trans);
    }

    trans->next = s->trans[nont_class];
    s->trans[nont_class] = trans;
}

/*
 * Scan regular expressions.
 *
 * We get them from shortest to longest, but because we reverse the order
 * by putting the transitions in a list, the list will have the longest
 * one first.
 */
static
void scan_regexp(Trellis t, State s, int nont_class, RegExp *regexp, trel_scanned_match cb)
{
    const char *from = t->input + s->position;
    void *state;
#define NALTS 1000
    int alts[NALTS];
    int endpos;

    s->trans[nont_class] = NULL;

    if (!regexp)
	return;

    state = prematch_regexp_all_alternatives(from, regexp, NALTS, alts);
    /* Loop over all end positions of matches */
    while ((endpos = match_regexp_next_alternative(state)) >= 0) {
	int i;
	/* check if we really want it (length MUST be > 0) */
	if (endpos == 0)
	    continue;

	/* Loop over all different regexps that end their match there */
	for (i = 0; i < NALTS && alts[i] != 0; i++) {
	    int alt = alts[i] - 1;	/* number of the $MATCH or $SKIP */
	    const char *to = from + endpos;
	    int next_lex_state;
	    Transition trans;
	    State dest;

	    /* abs_message ("match_regexps: found endpos %d match #%d `%.*s'",
			    endpos, alt, endpos, from);
	    */
	    int length = (int)(to - from);

	    if (cb) {
		int lex_type;

		/*
		 * Find out which type of *fix regex it was,
		 * by calling the callback with a NULL transition
		 * (since it isn't filled in yet anyway).
		 */
		lex_type = cb(t, s, NULL, alt);
		next_lex_state = determine_next_lex_state(s->lex_state, lex_type);
		if (next_lex_state == LEX_STATE_ERROR)
		    continue;
	    } else {
		next_lex_state = LEX_STATE_S;
	    }

            dest = init_state(t, s->position + length, next_lex_state);
	    trans = trel_alloc_transition(t);
	    trans->dest = dest;
	    trans->text = trel_strndup(t, from, length, "scan_regexp");
	    trans->length = length;

	    if (cb) {
		(void)cb(t, s, trans, alt);
	    }

	    trans->next = s->trans[nont_class];
	    s->trans[nont_class] = trans;
	}
    }
    postmatch_regexp_all_alternatives(state);
}

/*
 * Scan the longest possible stretch of non-whitespace.
 *
 * If there whitespace set is defined empty, this will match the whole
 * rest of the input.
 */
static
void scan_other(Trellis t, State s, int nont_class)
{
    const char *from = t->input + s->position;
    const char *to = from;
    Transition trans;
    State dest;
    int length;

    s->trans[nont_class] = NULL;

    while (*to) {
	if (is_white(t, *to))
	    break;
	to++;
    }

    /* There must be at least one non-space */
    if (to == from) {
	return;
    }

    /* 'to' points now past the word */
    length = (int)(to - from);

    dest = init_state(t, s->position + length, LEX_STATE_S);
    trans = trel_alloc_transition(t);
    trans->dest = dest;
    trans->text = trel_strndup(t, from, length, "scan_other");
    trans->length = length;

    if (t->scanned_other) {
	t->scanned_other(t, s, trans);
    }

    trans->next = s->trans[nont_class];
    s->trans[nont_class] = trans;
}

/*
 * Public functions.
 */
Trellis trel_init(int nr_lex_nonts)
{
#if USE_POOL
    Pool pool = abs_pool_init(128 * 1024, 1024 * 1024);
    Trellis t = abs_pool_malloc(pool, sizeof(struct trellis), "trel_init");
#else
    Trellis t = abs_malloc(sizeof(struct trellis), "trel_init");
#endif
    if (t) {
#if USE_POOL
	t->pool = pool;
#endif
	t->length = 0;
	t->states = NULL;
	t->nclasses = nr_lex_nonts + NR_EXTRA_CLASSES;
	t->input = NULL;
	t->line = 0;
	t->col = 0;
	t->trans_ext_free = NULL;
	t->state_ext_free = NULL;
	t->lexicon = NULL;
	t->white = " \t\r\n\v";
	t->separators = NULL;
	t->scanned_lexicon = NULL;
	t->scanned_match = NULL;
	t->scanned_skip = NULL;
	t->scanned_white = NULL;
	t->scanned_other = NULL;
	t->scanned_eos = NULL;
	t->scanning_completed = NULL;
	t->match = 0;
	t->skip = 0;
	t->eos_transition = NULL;
	DB_TREL(printf("trel_init: %d classes\n", t->nclasses);)
    }
    return t;
}

static void trel_reset(Trellis t)
{
    free_states(t);
    if (t->eos_transition) {
	trel_free_transition(t, t->eos_transition);
	t->eos_transition = NULL;
    }
    t->states = NULL;
    t->length = 0;
}

void trel_delete(Trellis t)
{
    trel_reset(t);
#if USE_POOL
    abs_pool_free(t->pool, "trel_delete");
#else
    abs_free(t, "trel_delete");
#endif
}

void trel_set_free_state_callback(Trellis t, trel_free_state_callback f)
{
    t->state_ext_free = f;
}

void trel_set_free_trans_callback(Trellis t, trel_free_trans_callback f)
{
    t->trans_ext_free = f;
}

State trel_get_initial_state(Trellis t)
{
    if (is_white(t, t->input[0]))
	return init_state(t, 0, LEX_STATE_S);
    return init_state(t, 0, LEX_STATE_W);
}

static
State trel_get_state(Trellis t, int pos, int lex_state)
{
    if (pos >= 0 && pos < t->length) {
	return init_state(t, pos, lex_state);
    }
    return NULL;
}

State trel_may_get_state(Trellis t, int pos, int lex_state)
{
    if (pos >= 0 && pos < t->length &&
	    lex_state >= 0 && lex_state < NUM_LEX_STATES) {
	return t->states[pos * NUM_LEX_STATES + lex_state];
    }
    return NULL;
}

State trel_state_next(Trellis t, State s)
{
    int pos, lex_state, idx, end;

    pos = s->position;
    lex_state = s->lex_state;
    idx = pos * NUM_LEX_STATES + lex_state;
    end = t->length * NUM_LEX_STATES;

    /* Consistency check */
    if (t->states[idx] != s) {
	return NULL;
    }

    while (++idx < end) {
	State s = t->states[idx];
	if (s != NULL)
	    return s;
    }

    return NULL;
}

State trel_state_next_white(Trellis t, State s, int skip)
{
    int pos;

    pos = s->position + skip;

    while (pos < t->length) {
	Transition trans;

	s = init_state(t, pos, LEX_STATE_S);

	trans = trel_state_scan_whitespace(t, s);
	if (trans != NULL)
	    return s;

	pos++;

    }

    return NULL;	/* reached EOS */
}

int trel_get_length(Trellis t)
{
    return t->length;
}

void trel_set_input(Trellis t, const char *input, int line, int col)
{
    int i;
    int nstates;

    t->length = strlen(input) + 1;
    DB_TREL(printf("trel_set_input: length=%d, '%s'\n", t->length, input);)

    t->input = input;
    t->line = line;
    t->col = col;
    nstates = t->length * NUM_LEX_STATES;
#if USE_POOL
    t->states = abs_pool_calloc(t->pool, nstates, sizeof(struct state *), "trel_set_input");
#else
    t->states = abs_calloc(nstates, sizeof(struct state *), "trel_set_input");
#endif

    for (i = 0; i < nstates; i++) {	/* abs_calloc doesn't clear!!! */
	t->states[i] = NULL;
    }
}

void trel_set_lexicon(Trellis t, Lexicon l)
{
    t->lexicon = l;
}

void trel_set_white(Trellis t, const char *white)
{
    if (white && white[0])
	t->white = white;
    else
	t->white = "";
}

void trel_set_separators(Trellis t, const char *separators)
{
    if (separators && separators[0])
	t->separators = separators;
    else
	t->separators = NULL;
}

void trel_set_match_regex(Trellis t, RegExp *match)
{
    t->match = match;
}

void trel_set_skip_regex(Trellis t, RegExp *skip)
{
    t->skip = skip;
}

/* maybe scan for a lexterminal, cached, no state machine */
static
Transition trel_state_mayscan_lexterminal(Trellis t, State s, int nr)
{
    Transition trans = s->trans[nr];
    if (trans == &no_transition_tried) {
	/* search in the input, maybe we'll find something. */
	scan_lexicon(t, s);
	trans = s->trans[nr];
    }
    return trans;
}

static
Transition trel_state_getcached_lexterminal(Trellis t, State s, int nr)
{
    Transition trans = s->trans[nr];
    if (trans == &no_transition_tried) {
	trans = s->trans[nr] = NULL;
    }
    return trans;
}

/*
 * The first regexp in the list happens to be (one of) the longest,
 * since they are found in length order and stored at the front
 * each time one is found.
 */
static
Transition trel_state_mayscan_match(Trellis t, State s)
{
    int nr = GR_MATCH_CLASS(t);
    Transition trans = s->trans[nr];
    if (trans == &no_transition_tried) {
	/* search in the input, maybe we'll find something. */
	scan_regexp(t, s, nr, t->match, t->scanned_match);
	trans = s->trans[nr];
    }
    return trans;
}

static
Transition trel_state_getcached_match(Trellis t, State s)
{
    int nr = GR_MATCH_CLASS(t);
    Transition trans = s->trans[nr];
    if (trans == &no_transition_tried) {
	trans = s->trans[nr] = NULL;
    }
    return trans;
}

static
Transition trel_state_mayscan_skip(Trellis t, State s)
{
    int nr;
    Transition trans;

#if 0 /* already handles in trel_state_scan_phases() */
    if (trel_state_mayscan_match(t, s) != NULL)
	return NULL;
#endif

    nr = GR_SKIP_CLASS(t);
    trans = s->trans[nr];

    if (trans == &no_transition_tried) {
	/* search in the input, maybe we'll find something. */
	scan_regexp(t, s, nr, t->skip, t->scanned_skip);
	trans = s->trans[nr];
    }
    return trans;
}

static
Transition trel_state_getcached_skip(Trellis t, State s)
{
    int nr;
    Transition trans;

    nr = GR_SKIP_CLASS(t);
    trans = s->trans[nr];

    if (trans == &no_transition_tried) {
	trans = s->trans[nr] = NULL;
    }
    return trans;
}

static
Transition trel_state_mayscan_other(Trellis t, State s)
{
    Transition trans;
    int nr;

#if 1
    if (s->lex_state != LEX_STATE_W) {
	DB_TREL(abs_message("trel_state_mayscan_other: not in state W, pos %d", s->position);)
	return NULL;
    }
#endif

    nr = GR_OTHER_CLASS(t);
    trans = s->trans[nr];
    if (trans == &no_transition_tried) {
	s->trans[nr] = NULL;
	/* Check if there is a transition to whitespace already */
	if (scan_any_to_whitepace(t, s, 0, GR_WHITE_CLASS(t)-1) != NULL) {
	    DB_TREL(abs_message("trel_state_mayscan_other: already some transition present");)
	    return NULL;
	}
	/* Search in the input, maybe we'll find something. */
	DB_TREL(abs_message("trel_state_mayscan_other: ok, scan some input @%d", s->position);)
	scan_other(t, s, nr);
	trans = s->trans[nr];
    }
    return trans;
}

Transition trel_state_getcached_other(Trellis t, State s)
{
    Transition trans;
    int nr;

    nr = GR_OTHER_CLASS(t);
    trans = s->trans[nr];
    if (trans == &no_transition_tried) {
	trans = s->trans[nr] = NULL;
    }
    return trans;
}

static
void trel_state_completed_scanning(Trellis t, State s)
{
    s->flags |= STATE_FLAG_FULLY_SCANNED;
    if (t->scanning_completed)
	t->scanning_completed(t, s);
}

/*
 * There are 3 phases:
 *
 *   I: fullwords (from lexicon)
 *
 *  II: part words (from lexicon)
 *      $WORD (because it is really a lexicon match)
 *      $MATCH
 *
 * III: $SKIP
 *      $OTHER
 *
 * If a lower phase puts anything in the trellis, a higher phase
 * will just give up.
 */

static
void trel_state_scan_phases(Trellis t, State s, int max_phase)
{
    int found_something = 0;
    Transition transition;

    if (s->flags & STATE_FLAG_FULLY_SCANNED)
	return;

    /* I: lexicon words */
    trel_state_mayscan_lexterminal(t, s, 0);
    found_something = 0; // (s->flags & STATE_FLAG_FULL_INPUT_WORDS_PRESENT) != 0;

    if (found_something) {
	trel_state_completed_scanning(t, s);
	return;
    }

    if (max_phase < 2) {
	return;
    }

    /* II: $MATCH */

    transition = trel_state_mayscan_match(t, s);
    found_something = found_something || (transition != NULL && completes_full_input_word(t, transition));

    /*
     * $WORD: Explicit scanning for $WORD is unneeded:
     * all the work is done when scanning for lexicon entries (above).
     */

    if (found_something) {
	trel_state_completed_scanning(t, s);
	return;
    }

    if (max_phase < 3) {
	return;
    }

    /* III: $SKIP, $OTHER */

    transition = trel_state_mayscan_skip(t, s);
    /* found_something = found_something || (transition != NULL && completes_full_input_word(t, transition)); */
    trel_state_mayscan_other(t, s);

    trel_state_completed_scanning(t, s);
}

/*
 * Lexical scan state machine:
 * handle epsilon transition from E to S,
 * and from S to W (matching whitespace).
 */
static
State trel_state_handle_epsilon(Trellis t, State s)
{
    Transition trans;

    for (;;) {
	switch (s->lex_state) {
	case LEX_STATE_S:	/* == LEX_STATE_E */
	    /* S: try to skip whitespace and go to state W */
	    trans = trel_state_scan_whitespace(t, s);
	    if (trans != NULL) {
		s = trans->dest;
		assert(s->lex_state == LEX_STATE_W);
		continue;
	    }
	    /* No good, try transitions out of state E then */
	    return s;
	case LEX_STATE_W:
	case LEX_STATE_I:
	    return s;
	default:
	    assert(0 && "trel_state_handle_epsilon: bad lex_state");
	}
    }

    return NULL;
}

/* maybe scan for a lexterminal, cached, with state machine */
Transition trel_state_scan_lexterminal(Trellis t, State s, int nr)
{
    s = trel_state_handle_epsilon(t, s);
    trel_state_scan_phases(t, s, 1);
    return trel_state_getcached_lexterminal(t, s, nr);
}

Transition trel_state_scan_terminal(Trellis t, State s)
{
    int nr = GR_TERM_CLASS(t);
    return trel_state_scan_lexterminal(t, s, nr);
}

Transition trel_state_scan_whitespace(Trellis t, State s)
{
    int nr = GR_WHITE_CLASS(t);
    Transition trans = s->trans[nr];
    if (trans == &no_transition_tried) {
	/* search in the input, maybe we'll find something. */
	scan_white(t, s, nr);
	trans = s->trans[nr];
    }
    return trans;
}

Transition trel_state_scan_match(Trellis t, State s)
{
    s = trel_state_handle_epsilon(t, s);
    trel_state_scan_phases(t, s, 2);
    return trel_state_getcached_match(t, s);
}

Transition trel_state_scan_skip(Trellis t, State s)
{
    s = trel_state_handle_epsilon(t, s);
    trel_state_scan_phases(t, s, 3);
    return trel_state_getcached_skip(t, s);
}

/*
 * For scanning $MATCH and $SKIP, we need to take control over
 * the regex parser; pattern.c and dfa.c.
 * XXX For now, assume the main program supplies it.
 */

static int is_state_white_space(Trellis t, State dest)
{
    return dest != NULL &&
	   dest->lex_state == LEX_STATE_S &&
	   (trel_state_scan_whitespace(t, dest) != NULL ||
	    trel_state_scan_eos(t, dest) != NULL);
}

static int completes_full_input_word(Trellis t, Transition trans)
{
    return is_state_white_space(t, trans->dest);
}

int trel_state_is_at_whitespace(Trellis t, State s)
{
    return is_state_white_space(t, s);
}

/*
 * Find the shortest transition from a range of classes (inclusive).
 * Assume there are no untried classes among them.
 *
 * Require whitespace following the transition (like $OTHER),
 * otherwise it may not be possible to make further progress
 * (not even with $OTHER, since it is not allowed to be a pre/in/postfix).
 */

Transition scan_shortest_to_whitepace(Trellis t, State s, int loclass, int hiclass)
{
    int shortest_length = INT_MAX;
    int class;
    Transition shortest = NULL;

    for (class = loclass; class <= hiclass; class++) {
	Transition trans = s->trans[class];
	for (; trans != NULL; trans = trans->next) {
	    int len = trans->length;
	    if (len > 0) {
		if ((len < shortest_length) && completes_full_input_word(t, trans)) {
		    shortest = trans;
		    shortest_length = len;
		}
	    }
	}
    }

    return shortest;
}

Transition scan_any_to_whitepace(Trellis t, State s, int loclass, int hiclass)
{
    int class;

    for (class = loclass; class <= hiclass; class++) {
	Transition trans = s->trans[class];
	for (; trans != NULL; trans = trans->next) {
	    int len = trans->length;
	    if (len > 0) {
		if (completes_full_input_word(t, trans)) {
		    return trans;
		}
	    }
	}
    }

    return NULL;
}

/*
 * Scan for $OTHER, which can be surprisingly expensive.
 * This tries all regular scanners, and iff that delivers nothing,
 * inserts an OTHER transition.
 *
 * Must not match white space.
 *
 * $OTHER cannot function as a prefix, infix or suffix, therefore
 * it can only start from state W.
 */
Transition trel_state_scan_other(Trellis t, State s)
{
    s = trel_state_handle_epsilon(t, s);

    DB_TREL(abs_message("trel_state_scan_other: call trel_state_scan_phases");)
    trel_state_scan_phases(t, s, 3);
    return trel_state_getcached_other(t, s);
    /*abs_message("trel_state_scan_other: call trel_state_mayscan_other");
    return trel_state_mayscan_other(t, s);*/
}

/*
 * Scan for $ANY, which can be surprisingly expensive.
 * Use the scanning for $OTHER as a helper, since it will make sure
 * there is at least 1 transition present.
 *
 * Must not match white space.
 *
 * XXX doesn't select for lowest penalty, since it doesn't know it.
 * Maybe add a callback argument?
 */
Transition trel_state_scan_any(Trellis t, State s)
{
    Transition trans;

    s = trel_state_handle_epsilon(t, s);

    DB_TREL(abs_message("trel_state_scan_any: call trel_state_scan_phases @%d", s->position);)
    trel_state_scan_phases(t, s, 3);
    DB_TREL(abs_message("trel_state_scan_any: call scan_shortest_to_whitepace @%d", s->position);)
    trans = scan_shortest_to_whitepace(t, s, 0, GR_WHITE_CLASS(t)-1);

    return trans;
}

/*
 * Scan for $WORD: a lexicon word (or grammar terminal).
 *
 * XXX doesn't select for lowest penalty, since it doesn't know it.
 * Maybe add a callback argument?
 */
Transition trel_state_scan_word(Trellis t, State s)
{
    Transition trans = NULL;

    s = trel_state_handle_epsilon(t, s);

#if 0
    /* any class will do, 0 is always in range even if it is GR_TERM_CLASS() */
    (void)trel_state_mayscan_lexterminal(t, s, 0);
#else
    trel_state_scan_phases(t, s, 2);
#endif
    /*
     * Search all lexicon classes, including grammar terminals.
     * Only try if anything potentially useful is present.
     */
    if (s->flags & STATE_FLAG_FULL_INPUT_WORDS_PRESENT)
	trans = scan_shortest_to_whitepace(t, s, 0, GR_TERM_CLASS(t));

    return trans;
}

/*
 * An End Of String can only be reached from states E or W;
 * state I requires more word to follow.
 */
Transition trel_state_scan_eos(Trellis t, State s)
{
    s = trel_state_handle_epsilon(t, s);

    if (s->lex_state != LEX_STATE_I) {
	if (s->position >= t->length - 1) {
	    if (t->eos_transition == NULL) {
		Transition trans = trel_alloc_transition(t);
		trans->dest = s;
#if USE_POOL
		trans->text = abs_pool_new_string(t->pool, "<EOS>", "trel_state_scan_eos");
#else
		trans->text = abs_new_string("<EOS>", "trel_state_scan_eos");
#endif
		trans->length = 0;

		if (t->scanned_eos) {
		    t->scanned_eos(t, s, trans);
		}
		t->eos_transition = trans;
	    }
	    return t->eos_transition;
	}
    }
    return NULL;
}

State trel_get_eos_state(Trellis t)
{
    State s = init_state(t, t->length - 1, LEX_STATE_E);

    return s;
}

Transition trel_get_eos_transition(Trellis t)
{
    State s = init_state(t, t->length - 1, LEX_STATE_E);

    return trel_state_scan_eos(t, s);
}

/*
 * For glueing two words together.
 */
State trel_state_glue(Trellis t, State s)
{
    if (s->lex_state == LEX_STATE_S) {
	s = init_state(t, s->position, LEX_STATE_I);
	DB_TREL(printf("trel_state_glue: %d S -> I\n", s->position);)
    } else {
	DB_TREL(("trel_state_glue: stay in %d %c\n", s->position, "SWIE"[s->lex_state]);)
    }
    return s;
}

/*
 * Undo the glue.
 */
State trel_state_dissolve(Trellis t, State s)
{
    if (s->lex_state == LEX_STATE_I) {
	s = init_state(t, s->position, LEX_STATE_S);
	DB_TREL(printf("trel_state_dissolve: %d I -> S\n", s->position);)
    } else {
	DB_TREL(("trel_state_dissolve: stay in %d %c\n", s->position, "SWIE"[s->lex_state]);)
    }
    return s;
}

/*
 * Setting of callbacks
 */

void trel_set_scanned_lexicon(Trellis t, trel_scanned_lexicon cb)
{
    t->scanned_lexicon = cb;
}

void trel_set_scanned_match(Trellis t, trel_scanned_match cb)
{
    t->scanned_match = cb;
}

void trel_set_scanned_skip(Trellis t, trel_scanned_match cb)
{
    t->scanned_skip = cb;
}

void trel_set_scanned_white(Trellis t, trel_scanned_white cb)
{
    t->scanned_white = cb;
}

void trel_set_scanned_other(Trellis t, trel_scanned_other cb)
{
    t->scanned_other = cb;
}

void trel_set_scanned_eos(Trellis t, trel_scanned_eos cb)
{
    t->scanned_eos = cb;
}

void trel_set_scanning_completed(Trellis t, trel_scanning_completed cb)
{
    t->scanning_completed = cb;
}


/*
 * Transition functions
 */
void *trel_trans_get_ext(Transition t)
{
    return t->trans_ext;
}

void trel_trans_set_ext(Transition t, void *ext)
{
    t->trans_ext = ext;
}

char *trel_trans_get_text(Transition t)
{
    return t->text;
}

Transition trel_trans_get_next(Transition trans)
{
    return trans->next;
}

void trel_trans_set_next(Transition trans, Transition next)
{
    trans->next = next;
}

Transition *trel_trans_get_nextptr(Transition trans)
{
    return &trans->next;
}

State trel_trans_get_dest(Transition trans)
{
    return trans->dest;
}

void trel_trans_set_dest(Transition trans, State dest)
{
    trans->dest = dest;
}

int trel_trans_get_length(Transition trans)
{
    return trans->length;
}

/*
 * Transition iteration.
 */
struct transiter {
    Trellis trellis;
    State state;
    Transition next;
    int class;
};

Transition find_next_class(TransIter iter)
{
    Trellis t = iter->trellis;
    State state = iter->state;

    while (++iter->class < t->nclasses) {
	Transition next;

	if ((next = state->trans[iter->class]) != NULL &&
	     next != &no_transition_tried) {
	    return next;
	}
    }
    return NULL;
}

TransIter trel_trans_iter(Trellis t, State s)
{
    TransIter iter = abs_malloc(sizeof(struct transiter), "trel_state_iter");
    iter->trellis = t;
    iter->state = s;
    iter->class = -1;
    iter->next = find_next_class(iter);

    return iter;
}

void trel_trans_iter_end(TransIter iter)
{
    abs_free(iter, "trel_trans_iter_end");
}

/* *iter++ */
Transition trel_trans_iter_next(TransIter iter)
{
    Transition curr = iter->next;

    if (curr) {
	Transition next = curr->next;

	if (next == NULL) {
	    next = find_next_class(iter);
	}
	iter->next = next;
    }

    return curr;
}

/*
 * State functions
 */
void *trel_state_get_ext(State s)
{
    return s->state_ext;
}

void trel_state_set_ext(State s, void *ext)
{
    s->state_ext = ext;
}

int trel_state_get_pos(State s)
{
    if (s)
	return s->position;
    else
	return -1;
}

char trel_state_get_lex_state(State s)
{
    return "SWIE"[s->lex_state];
}

int trel_state_get_line(State s)
{
    return s->line;
}

int trel_state_get_col(State s)
{
    return s->col;
}

Transition trel_state_get_trans(State s, int nr)
{
    return s->trans[nr];
}

void trel_state_set_trans(State s, int nr, Transition trans)
{
    s->trans[nr] = trans;
}
