/*
   File: rtslex.c
   Implementation of lexical analysis module.

   Copyright 2005 Radboud University of Nijmegen
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
 
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU Library General Public License for more details.
 
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

   CVS ID: "$Id: rtslex.c,v 1.116 2006/09/27 16:01:15 marcs Exp $"
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /* HAVE_CONFIG_H */

/* standard includes */
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <sys/types.h>

/* libabase includes */
#include <abase_error.h>
#include <abase_memalloc.h>

/* liblexicon includes */
#include <lexicon.h>
#include <lexicon_input.h>
#include <lexicon_search.h>

/* local includes */
#include "pattern.h"
#include "rtsio.h"
#include "rtsesc.h"
#include "rtscode.h"
#include "rtslex.h"

#ifdef PMRTS
#include "posmemo.h"
#endif /* PMRTS */

/*
   For regular expressions, all $MATCH regexps are combined into one
   big regexp; the same holds for all $SKIP regexps. Instead of trying
   to match them all individually, the DFA executes them all in parallel.
   At any point where a match may end, this is recorded.
*/

#ifdef DEBUG_RTS
#define DB_RTS(x) x
#else
#define DB_RTS(x)
#endif

/*
   When debugging with -G:
   Output ; between trellis parts
   output state ptrs as well
*/
#ifdef DEBUG_RTS
#define PRINT_PARTS_SEMICOLON
#define PRINT_STATE_PTRS
#endif

/* may also compile with -DSTANDALONE_LEXER for test environment */
#ifdef COUNT_TRACE
long n_trel_builds = 0;
long n_parts_tok_returns = 0;
#endif

/*------------------------------------------------------------------------------
// Global data
//----------------------------------------------------------------------------*/
static LexInfo*       lex_info;

static RegExp**       match_table_p;
static RegExp *	      match_combined;
static RegExp**       skip_table_p;
static RegExp *	      skip_combined;
static LexemeType     *skip_regexp_lex_types;
static LexemeType     *match_regexp_lex_types;

/* These were exported for rtstrelinp */
static int gr_term_class;
static int re_match_class;
static int re_skip_class;
static int other_class;

/* no longer exported */
static int NR_classes; /* = nr_lexicon_nonterminals + 4 */

/*
   Structures for memory management
   Each kind of object is managed by its own MemInfo
*/
typedef struct
{ size_t	obj_size;
  void*	 free_list;
  unsigned      requested;
  unsigned      allocated;
  unsigned      freed;
} MemInfo;

static MemInfo	trellis_mem;
static MemInfo	statenode_mem;
static MemInfo	translist_mem;
static MemInfo	transition_mem;
static MemInfo	neg_memo_mem;
static MemInfo	pos_memo_mem;

#ifdef SHOW_LEXINFO_NRS
static void show_lexinfo_nrs (char *where)
{ abs_printf ("lex_info nrs at %s:", where);
  abs_printf ("nr_nont_classes=%d ", lex_info -> nr_nont_classes);
  abs_printf ("nr_terminals=%d ", lex_info -> nr_terminals);
  abs_printf ("nr_matches=%d ", lex_info -> nr_matches);
  abs_printf ("nr_skips=%d ", lex_info -> nr_skips);
  abs_printf ("nr_neg_memos=%d ", lex_info -> nr_neg_memos);
  abs_message ("nr_syntax_nonterminals=%d", lex_info -> nr_syntax_nonterminals);
}
#endif

/*------------------------------------------------------------------------------
// Module interface implementation
//----------------------------------------------------------------------------*/
static void log_unknown_token (unsigned pos, unsigned char* str, unsigned len)
{ if (lex_info -> log_unknown != NULL)
    (*lex_info -> log_unknown) (pos, (char*) str, len);
}

unsigned get_nr_neg_memos (void)
{ return (lex_info -> nr_neg_memos);
}

#define get_nr_pos_memos get_nr_syntax_nonterminals
unsigned long get_nr_syntax_nonterminals (void)
{ return (lex_info -> nr_syntax_nonterminals);
}

static unsigned get_nr_terminals (void)
{ return (lex_info -> nr_terminals);
}

static unsigned get_nr_matches (void)
{ return (lex_info -> nr_matches);
}

static unsigned get_nr_skips (void)
{ return (lex_info -> nr_skips);
}

static unsigned char* get_terminal (unsigned i)
{ return ((unsigned char*) lex_info -> terminals[i]);
}

static Lexicon get_lexicon ()
{ return (lex_info -> lexicon);
}

static Trie get_trie ()
{ return (lex_info -> trie);
}

static char** get_matches ()
{ return (lex_info -> matches);
}

static char** get_skips ()
{ return (lex_info -> skips);
}

Terminal get_eos_terminal (void)
{ return (lex_info -> eos_terminal);
}

static unsigned char* get_blanks (void)
{ return (lex_info -> blanks);
}

static unsigned char* get_terminators (void)
{ return (lex_info -> terminators);
}

static unsigned char* get_invisibles (void)
{ return (lex_info -> invisibles);
}

static unsigned char* get_translate_src (void)
{ return (lex_info -> translate_src);
}

static unsigned char* get_translate_dst (void)
{ return (lex_info -> translate_dst);
}

/*------------------------------------------------------------------------------
// Memory manager
//----------------------------------------------------------------------------*/
static void init_mem_info (MemInfo* mem_info, size_t obj_size)
{ mem_info -> obj_size = obj_size;
  mem_info -> free_list = NULL;
  mem_info -> requested = 0;
  mem_info -> allocated = 0;
  mem_info -> freed = 0;
}

static void end_mem_info (MemInfo* mem_info)
{ void* mem = mem_info -> free_list;
  while (mem != NULL)
    { void* next = *(void**)mem;
      abs_free (mem, "end_mem_info");
      mem = next;
      mem_info -> freed++;
    };
  mem_info -> free_list = NULL;
}

static void *alloc_mem (MemInfo* mem_info)
{ void* mem;
  if (mem_info->free_list == NULL)
    { mem = abs_malloc (mem_info -> obj_size, "alloc_mem");
      mem_info -> allocated++;
    }
  else
    { mem = mem_info -> free_list;
      mem_info -> free_list = *(void**)mem;
    };
  mem_info -> requested++;
  return (mem);
}

static void free_mem (MemInfo* mem_info, void* mem)
{ *(void**) mem = mem_info -> free_list;
  mem_info -> free_list = mem;
}

#ifdef MEM_STATS
static void show_mem_info (MemInfo* mem_info, const char* str)
{ abs_message ("Memory stats for %s: requested %d, allocated %d, freed %d",
	       str, mem_info -> requested, mem_info -> allocated, mem_info -> freed);
}
#endif /* MEM_STATS */

static void init_memory_manager (int nr_nont_classes)
{ unsigned nr_neg_memos = get_nr_neg_memos ();
#ifdef PMRTS
  unsigned nr_pos_memos = get_nr_pos_memos ();
#endif /* PMRTS */

  init_mem_info (&trellis_mem, sizeof (Trellis));
  init_mem_info (&neg_memo_mem, sizeof (void*) + nr_neg_memos * sizeof (NegMemo));
#ifdef PMRTS
  init_mem_info (&pos_memo_mem, nr_pos_memos * sizeof (PosMemo));
#endif /* PMRTS */
  init_mem_info (&statenode_mem, sizeof (StateNode));
  init_mem_info (&translist_mem, nr_nont_classes * sizeof (Transition *));
  init_mem_info (&transition_mem, sizeof (Transition));
}

static void end_memory_manager (void)
{ end_mem_info (&neg_memo_mem);
  end_mem_info (&pos_memo_mem);
  end_mem_info (&statenode_mem);
  end_mem_info (&translist_mem);
  end_mem_info (&transition_mem);
  end_mem_info (&trellis_mem);
#ifdef MEM_STATS
  show_mem_info (&neg_memo_mem,  "neg_memo tables");
#ifdef PMRTS
  show_mem_info (&pos_memo_mem,  "pos_memo tables");
#endif /* PMRTS */
  show_mem_info (&statenode_mem, "StateNodes");
  show_mem_info (&translist_mem, "TransitionLists");
  show_mem_info (&transition_mem, "Transitions");
  show_mem_info (&trellis_mem, "trelles");	/* FN: plural? */
#endif /* MEM_STATS */
}

static NegMemo* alloc_neg_memos (void)
{ NegMemo* neg_memos = (NegMemo*) alloc_mem (&neg_memo_mem);
  return (neg_memos);
}

static void free_neg_memos (NegMemo* neg_memos)
{ free_mem (&neg_memo_mem, neg_memos);
}

#ifdef PMRTS
static PosMemo *alloc_pos_memos (void)
{ PosMemo *pos_memos = (PosMemo *) abs_calloc (get_nr_syntax_nonterminals (), sizeof (PosMemo),
					      "alloc_pos_memos");
  return (pos_memos);
}

static void reset_pos_memos (PosMemo *pos_memos);

static void free_pos_memos (PosMemo* pos_memos)
{ reset_pos_memos (pos_memos);
  abs_free (pos_memos, "free_pos_memos");
}

static char *alloc_lrec_markers (void)
{ char *markers = (char *) abs_calloc (get_nr_syntax_nonterminals(), sizeof (char),
				       "alloc_lrec_markers");
  return (markers);
}

static void free_lrec_markers (char *markers)
{ abs_free (markers, "free_lrec_markers");
}

#endif /* PMRTS */

static Transition* alloc_transition (void)
{ Transition *transition = (Transition*) alloc_mem (&transition_mem);
  return (transition);
}

static void free_transition (Transition* transition)
{ free_mem (&transition_mem, transition);
}

static Transition** alloc_translist (void)
{ Transition **translist = (Transition**)alloc_mem(&translist_mem);
  return (translist);
}

static void free_translist (Transition** translist)
{ free_mem (&translist_mem, translist);
}

/*
   If there is a StateNode at a certain pos, at least one of the
   classes must have a transition.
*/
static StateNode *alloc_statenode (void)
{ StateNode *statenode = (StateNode*) alloc_mem (&statenode_mem);
  statenode -> trans_lists = alloc_translist ();
  return (statenode);
}

static void free_statenode (StateNode* statenode)
{ free_translist (statenode -> trans_lists);
  free_mem (&statenode_mem, statenode);
}

static void init_statenode (StateNode* statenode, Position pos)
{ Transition **tlists = statenode -> trans_lists;
  int class;
  for (class = 0; class < NR_classes; class++) tlists[class] = NULL;
  statenode -> neg_memos = NULL;
#ifdef PMRTS
  statenode -> pos_memos = NULL;
  statenode -> lrec_markers = NULL;
#endif /* PMRTS */
  statenode -> pos = pos;
}

#ifdef PARTS_TRACE
static void dump_trans_lists_ptrs (char *procname, Position cur_pos, Transition** trans_lists)
{ int class;
  for (class = NR_classes - 1; class >= 0; class--)
    { Transition* transition = trans_lists[class];
      if (transition != NULL)
	abs_message ("%s: pos %d class %d list:%p", procname, cur_pos, class, transition);
    };
}
#endif /* PARTS_TRACE */

static StateNode** alloc_state_row (unsigned len)
{ return ((StateNode**) abs_calloc (len, sizeof (StateNode*), "alloc_state_row"));
}

static void free_state_row (StateNode** row)
{ abs_free (row, "free_state_row");
}

static AggregatePartStates* alloc_init_aggregate_array (unsigned len)
{ AggregatePartStates* array =
     (AggregatePartStates*) abs_calloc (len, sizeof(AggregatePartStates),
					"alloc_init_aggregate_array");
  unsigned index;
  for (index = 0; index < len; index++) array[index] = 0;
  return (array);
}

static void free_aggregate_array (AggregatePartStates* array)
{ abs_free (array, "free_aggregate_array");
}

static Trellis* alloc_trellis (unsigned len)
{ Trellis* trellis = (Trellis*) alloc_mem (&trellis_mem);
  trellis -> states_row = alloc_state_row (len);
  trellis -> pstates_row = alloc_state_row (len);
  trellis -> last_part_end_from = (Position*) abs_calloc (len, sizeof(Position), "alloc_trellis");
  return (trellis);
}

static void free_trellis (Trellis* trellis)
{ abs_free (trellis -> last_part_end_from, "free_trellis");
  free_state_row (trellis -> states_row);
  free_state_row (trellis -> pstates_row);
  free_mem (&trellis_mem, trellis);
}

static LexemeType derive_lex_type_and_strip_hyphens (char **p_txtbeg, char **p_txtend)
/* assumes real hyphens are escaped with '\' */
{   char *t_end = *p_txtend - 1;
    int has_endhyph = 0;

    if (*p_txtend - *p_txtbeg < 2) {
	return (SingleToken);	/* avoid indexing before string */
    }

    if (*t_end == '-') {
	if (t_end[-1] != '\\') {
		has_endhyph++;
	} else {
		/* count the number of backslashes:
		   if odd, the hyphen is escaped, so we don't have a prefix
		 */
		char *hp = t_end - 1;
		while ((hp > *p_txtbeg) && (*--hp == '\\')) ;
		if (*hp != '\\') hp++;
		has_endhyph = !((t_end - hp) % 2);
	}
    }

    if (has_endhyph) {
	*p_txtend = t_end;
	if (**p_txtbeg == '-') {
	    (*p_txtbeg)++;
	    return (Infix);
	} else {
	    return (Prefix);
	}
    } else if (**p_txtbeg == '-') {
	(*p_txtbeg)++;
	return (Suffix);
    } else {
	return (SingleToken);
    }
} /* derive_lex_type_and_strip_hyphens */

/*------------------------------------------------------------------------------
// Regular expressions
//----------------------------------------------------------------------------*/
static void regexp_abort (char *func_name, int error, char* regexp)
{ if (error < 0) abs_abort (func_name, "regular expressions not supported");
  else abs_abort (func_name, "invalid regular expression: '%s'", regexp);
}

/* TODO: delete compiled regexp table from generated code */
static LexemeType *alloc_re_lex_types (unsigned sz)
{ return (LexemeType*) abs_calloc (sz, sizeof(LexemeType), "alloc_re_lex_types");
}

static void free_re_lex_types (LexemeType *re_lex_types)
{ abs_free (re_lex_types, "free_re_lex_types");
}

static RegExp **compile_regexps (unsigned sz, char** regexps, LexemeType **re_lex_types_h,
				 RegExp **combined)
{ RegExp** table = NULL;
  if (sz > 0)
  {
    unsigned i;
    LexemeType *re_lex_types = alloc_re_lex_types(sz);
    *re_lex_types_h = re_lex_types;
    table = (RegExp**) abs_calloc (sz, sizeof(RegExp*), "compile_regexps");

    /*
     * Apart from compiling each regexp separately, we also create one that
     * combines them all. Because we use a DFA to match it, it is no slower
     * than any of the separate regexps (apart from its compilation time).
     * It can save a lot of time if none of the regexps can match.
     * The DFA can even tell you which of the alternatives has matched;
     * unfortunately that doesn't help much, since we want them all.
     */
    if (combined) {
	*combined = empty_regexp();
    }

    for (i = 0; i < sz; i++)
    { int error = 0;
      char *re_beg = regexps[i];
      char *re_end = re_beg + strlen(re_beg);

      re_lex_types[i] = derive_lex_type_and_strip_hyphens(&re_beg, &re_end);
      if (*re_end) {
	/* re_end was set back, so we use a local copy of the string */
	int re_len = re_end - re_beg;
	char *re_nbeg = (char *) abs_malloc (re_len + 1, "compile_regexps:dup");
	strncpy(re_nbeg, re_beg, re_len);
	re_nbeg[re_len] = '\0';
	if (combined)
	  add_alternative_to_regexp(*combined, re_nbeg);
	table[i] = NULL;
	abs_free (re_nbeg, "compile_regexps:free");
      } else {
	if (combined)
	  add_alternative_to_regexp(*combined, re_beg);
	table[i] = NULL;
      }
      if (error != 0)
	regexp_abort("compile_regexps", error, regexps[i]);
    }
    if (combined)
      finalize_regexp(*combined);
  }
  return (table);
}

static void delete_regexps (unsigned sz, RegExp** table,
			    LexemeType *re_lex_types, RegExp *combined)
{ if (table != NULL)
    { unsigned i;
      for (i = 0; i < sz; i++)
	delete_regexp (table[i]);
      abs_free (table, "delete_regexps");
    };
  if (sz > 0)
    free_re_lex_types(re_lex_types);
  if (combined)
    delete_regexp(combined);
}

#ifdef SHOW_RE_LISTS
int debug_nrm;
int debug_nrs;

static void show_re_lists (char *where)
{ int rnr;
  abs_message ("%s match_table_p:", where);
  for (rnr = 0; rnr < debug_nrm; rnr++) abs_printf (" 0x%0x", match_table_p[rnr]);
  abs_printf ("\n");
  abs_message ("%s skip_table_p:\n", where);
  for (rnr = 0; rnr < debug_nrs; rnr++) abs_printf (" 0x%0x", skip_table_p[rnr]);
  abs_printf ("\n");
}
#endif /* SHOW_RE_LISTS */

static void init_regexps (void)
{ match_table_p = compile_regexps (get_nr_matches (), get_matches (),
				   &match_regexp_lex_types, &match_combined);
  skip_table_p = compile_regexps (get_nr_skips (), get_skips (),
				  &skip_regexp_lex_types, &skip_combined);
#ifdef SHOW_RE_LISTS
  debug_nrm = get_nr_matches ();
  debug_nrs = get_nr_skips ();
  show_re_lists ("init_regexps");
#endif /* SHOW_RE_LISTS */
}

static void end_regexps (void)
{ delete_regexps (get_nr_matches(), match_table_p, match_regexp_lex_types, match_combined);
  delete_regexps (get_nr_skips(), skip_table_p, skip_regexp_lex_types, skip_combined);
}

typedef enum { RegMatch, RegSkip } RegType;
static unsigned get_nr_regexps (RegType reg_type)
{ switch (reg_type)
    { case RegMatch: return (get_nr_matches ());
      case RegSkip:  return (get_nr_skips ());
      default: abs_abort ("get_nr_regexps", "bad RegType %d", (int) reg_type);
    }
  return (0);
}

static RegExp* get_combined_regexp(RegType reg_type)
{
#ifdef SHOW_RE_LISTS
  abs_message ("get_combined_regexp (RegType=%d)", reg_type);
#endif /* SHOW_RE_LISTS */
  switch (reg_type)
    { case RegMatch: return (match_combined);
      case RegSkip:  return (skip_combined);
      default: abs_abort ("get_combined_regexp", "bad RegType %d", (int) reg_type);
    }
  return (0);
}

static LexemeType regexp_lex_type(unsigned id, RegType reg_type)
{ switch (reg_type)
    { case RegSkip:  return (skip_regexp_lex_types[id]);
      case RegMatch: return (match_regexp_lex_types[id]);
      default: abs_abort ("regexp_lex_type", "bad RegType %d", (int) reg_type);
    }
  return (0);
}

/*------------------------------------------------------------------------------
// Initialization and finalization of module
//----------------------------------------------------------------------------*/
void init_lexer (LexInfo* info)
{ assert((info != NULL) && "init_lexer: info already available.");
  lex_info = info;
  NR_classes = nr_lexicon_nonterminals + 4;
  gr_term_class = DECODE_TERM_OR_RE_CLASS(ENCODE_TERM(0));
  re_match_class = DECODE_TERM_OR_RE_CLASS(ENCODE_MATCH(0));
  re_skip_class = DECODE_TERM_OR_RE_CLASS(ENCODE_SKIP(0));
  other_class = DECODE_TERM_OR_RE_CLASS(ENCODE_OTHER(0));
#ifdef DEBUG_NONT_CLASSES
  abs_message ("init_lexer: directors_option=%d neg_memo_option=%d",
	       directors_option, neg_memo_option);
  abs_message ("init_lexer: pos_memo_option=%d", pos_memo_option);
  abs_message ("init_lexer: gr_term_class=%d re_match_class=%d re_skip_class=%d",
	       gr_term_class, re_match_class, re_skip_class);
#endif

  init_memory_manager(NR_classes);
  lxcn_init_char_tables (get_blanks (), get_terminators (), get_invisibles (),
 			 get_translate_src (), get_translate_dst ());
  init_regexps ();
#ifdef SHOW_LEXINFO_NRS
  show_lexinfo_nrs ("end of init_lexer");
#endif
}

void end_lexer (void)
{ end_regexps ();
  end_memory_manager ();
}

/*------------------------------------------------------------------------------
// Initialization of states
//----------------------------------------------------------------------------*/
const Penalty penalty_unknown = 0;
const Penalty penalty_transition = 1;	/* Initial penalty for any transition */

/*------------------------------------------------------------------------------
// Encoding functions for Transition.terminal field.
//
// See rtslex.h for actual bit encodings.
// code_nonterminal	used in Transition expansion (by TaggedValues) after match process
// code_terminal	used in the match process
// code_regexp		used in the match process
//----------------------------------------------------------------------------*/
static Terminal code_nonterminal (unsigned id, int arity)
{ return (ENCODE_NONT (id, arity));
}

static Terminal code_terminal (unsigned id)
{ return (ENCODE_TERM (id));
}

static Terminal code_regexp (unsigned id, RegType reg_type)
{ switch (reg_type)
    { case RegSkip:  return (ENCODE_SKIP (id));
      case RegMatch: return (ENCODE_MATCH (id));
      default: abs_abort ("code_regexp", "bad reg_type %d", (int) reg_type);
    };
  return (0);
}

static TransitionType code_transition_type (LexemeType lex_type)
{ TransitionType type = 0;
  switch (lex_type)
    { case Prefix:	type |= PrefixBit; break;
      case Infix:	type |= InfixBit; break;
      case Suffix:	type |= SuffixBit; break;
      case SingleToken:	type |= SingleTokenBit; break;
      case MultiToken:	type |= MultiTokenBit; break;
      default: break;
    };
  return (type);
}

LexemeType get_transition_lex_type (Transition* transition)
{ switch (transition -> type & (PrefixBit | InfixBit | SuffixBit | SingleTokenBit | MultiTokenBit))
    { case PrefixBit:		return (Prefix);
      case InfixBit:		return (Infix);
      case SuffixBit:		return (Suffix);
      case SingleTokenBit:	return (SingleToken);
      case MultiTokenBit:	return (MultiToken);
      default:
	 abs_abort ("get_transition_lex_type", "transition -> type = %d", transition -> type);
    }
  return (0);
}

static char *get_eos_text (void)
{ return ("<EOS>");
}

/*
  Grammar terminals are preceded by their coded lexem type when necessary
  Hence, the actual text may start at the next position
*/
static char* get_terminal_text (unsigned id, LexemeType lex_type)
{ char* terminal = (char*) get_terminal(id);
  /* maybe skip control-character indicating lexeme type */
  return ((lex_type == SingleToken) ? terminal : (terminal + 1));
}

static char* copy_string (const char* src, unsigned len)
{ char* dst = (char*) abs_malloc (len + 1, "copy_string");
  strncpy(dst, src, len);
  dst[len] = EosMark;
  return (dst);
}

typedef TransitionType TransTest (Transition* transition);

static TransitionType trans_true (Transition* transition)
{ return (1);
}

static TransitionType is_eos_transition (Transition* transition)
{ return (transition -> type & EosBit);
}

static TransitionType is_terminal_transition (Transition* transition)
{ return (transition -> type & TermBit);
}

static TransitionType is_lexicon_transition (Transition* transition)
{ return (transition -> type & LexBit);
}

static TransitionType is_skip_regexp_transition (Transition* transition)
{ return (transition -> type & SkipBit);
}

static TransitionType is_match_regexp_transition (Transition* transition)
{ return (transition -> type & MatchBit);
}

static TransitionType is_other_transition (Transition* transition)
{ return (transition -> type & OtherBit);
}

static TransitionType regexp_state_bit (RegType reg_type)
{ switch (reg_type)
    { case RegSkip:	return (SkipBit);
      case RegMatch:	return (MatchBit);
      default:		abs_abort ("regexp_state_bit", "bad reg_type %d", reg_type);
    };
  return (0);
}

/*
   TODO: Maybe we should clear params, memos, next, and trans in alloc_state.
*/
static void init_transition (Transition* transition, unsigned pos)
{ transition -> params = NULL;
  transition -> penalty = penalty_unknown;
  transition -> trans_dest_state = NULL;
  transition -> next = NULL;
}

static void init_eos_transition (Transition* transition, unsigned pos)
{ init_transition (transition, pos);
  transition -> terminal = code_terminal (get_eos_terminal());
  transition -> text = get_eos_text ();
  transition -> type = EosBit | code_transition_type (SingleToken);
}

static void init_terminal_transition (Transition* transition, unsigned pos, unsigned id,
				      LexemeType lex_type)
{ init_transition (transition, pos);
  transition -> terminal = code_terminal (id);
  transition -> text = get_terminal_text (id, lex_type);
  transition -> type = TermBit | code_transition_type (lex_type);
}

static void init_other_transition (Transition* transition, unsigned pos, 
				   unsigned char *txt, unsigned len)
{ init_transition (transition, pos);
  transition -> terminal = ENCODE_OTHER (0);
  transition -> text = copy_string ((char *) txt, len);
  transition -> type = OtherBit | TxtFreeBit | code_transition_type (SingleToken);
}

/*------------------------------------------------------------------------
// Currently, states may be duplicated (for each parameter list)
// by add_transition_entry_params()
// called from expand_and_insert_transition().
// TODO: store list of parameter lists in lexicon state
// instead of duplicating states and change MATCH_LEX accordingly.
//----------------------------------------------------------------------*/
static void init_lexicon_transition (Transition* transition, unsigned pos, long info,
				     unsigned char* from, unsigned char* to,
				     LexemeType lex_type)
{ unsigned length = to - from;
  init_transition(transition, pos);
  /* params and penalty filled later by add_transition_entry_params() */
  transition -> terminal = info; /* changed by add_transition_entry_params() */
  transition -> text = copy_string ((char*) from, length);
  transition -> type = LexBit | TxtFreeBit | code_transition_type (lex_type);
}

static void init_regexp_transition (Transition* transition, unsigned pos, unsigned id,
	 			    unsigned char* from, unsigned char* to,
				    RegType reg_type)
{ unsigned length = to - from;
  init_transition (transition, pos);
  transition -> terminal = code_regexp (id, reg_type);
  transition -> text = copy_string ((char*)from, length);
  transition -> type = regexp_state_bit (reg_type) | TxtFreeBit |
		       code_transition_type (regexp_lex_type (id, reg_type));
}

static StateNode* const FAILURE = (StateNode*)1;
static void mark_failure (StateNode** state_row, unsigned pos)
{ state_row[pos] = FAILURE;
}

static int has_failure (StateNode* state)
{ return (state == FAILURE);
}

static void mark_token_start (Transition *trans)
{ trans -> type |= TokenStartBit;
}

static TransitionType is_token_start (Transition *trans)
{ return (trans -> type & TokenStartBit);
}

static TransitionType not_token_start (Transition *trans)
{ return (!is_token_start (trans));
}

static void mark_live_final_part (Transition *trans)
{ trans -> type |= (TokenPartBit | FinalPartBit);
}

static void mark_live_nonfinal_part (Transition *trans)
{ trans -> type |= (TokenPartBit | NonfinalPartBit);
}

static TransitionType is_live_token_part (Transition *trans)
{ return (trans -> type & TokenPartBit);
}

static TransitionType not_live_token_part (Transition *trans)
{ return (!is_live_token_part (trans));
}

/*---------------------------------------------
// Final: part has transition to next token;
// Nonfinal: part has transition to next part.
// (A part may have both flags on.)
//-------------------------------------------*/
static TransitionType is_final_part (Transition *trans)
{ return (trans -> type & FinalPartBit);
}

static TransitionType is_nonfinal_part (Transition *trans)
{ return (trans -> type & NonfinalPartBit);
}

/* transition kind, see also IS_LASTPART / HAS_PARTS_TRANSITION in rtslex.h */
static TransitionType has_transition (Transition *trans)
{ return (trans -> type & (TransPartsBit | TransTokenBit));
}

static TransitionType has_token_transition (Transition *trans)
{ return (trans -> type & TransTokenBit);
}

static void add_transition (Transition* state, StateIndicator tdest)
{ assert (!has_transition(state) && "ADD_transition: has no transition state.");
  state -> trans_dest_state = tdest;
#ifdef SHOW_ZERO_DEST
  if (!tdest)
     abs_message ("ADD_transition: tdest=0x%p for trans 0x%p", tdest, state);
#endif
  state -> type |= TransTokenBit;
}

static void add_parts_transition (Transition* state, StateIndicator tdest)
{ assert (!has_transition(state) && "Add_parts_transition: has no transition state.");
  state -> trans_dest_state = tdest;
#ifdef SHOW_ZERO_DEST
  if (!tdest)
     abs_message ("ADD_parts_transition: tdest=0x%p for trans 0x%p", tdest, state);
#endif
  state -> type |= TransPartsBit;
}

static StateIndicator get_transition_dest (Transition *trans)
{ return (trans -> trans_dest_state);
}

static void store_length (Transition *trans, unsigned len)
{ assert (!has_transition (trans) && "store_length: has no transition state.");
  trans -> len = len;
}

static unsigned get_length (Transition *trans)
{ assert (!has_transition (trans) && "get_length: nas no transition state.");
  return (trans -> len);
}

/*------------------------------------------------------------------------------
// Function
//----------------------------------------------------------------------------*/
static int add_transition_entry_params (Transition* transition, int entry_idx,
	                                Lexicon the_lex, int *nontnr)
{ int arity;
  Penalty lexicon_penalty;

  lxcn_get_params_from_entry_in_list (the_lex, entry_idx, nontnr, &arity,
				      &lexicon_penalty, &(transition -> params));
  transition -> penalty = penalty_unknown + lexicon_penalty;

#ifdef NONTNR_TRACE
  abs_message ("add_transition_entry_params: ->term %d nont %d arity %d",
	       transition->terminal, *nontnr, arity);
#endif

  transition -> terminal = code_nonterminal (*nontnr, arity);
  return (1);
}

/*------------------------------------------------------------------------------
// Function:
//	static void insert_transition (StateNode** states_row, unsigned pos, int nont_class,
//	                               Transition* transition, unsigned len)
// Description:
//	Insert transition in list for class nont_class, at position pos
//	in state_row, covering input at position pos with length len.
//
// CRUCIAL:
//	The new transition should be inserted in front of the list,
//	in order to keep the (possibly shared) tail intact.
//----------------------------------------------------------------------------*/
static void insert_transition (StateNode** states_row, unsigned pos, int nont_class,
			       Transition* transition, unsigned len)
{ Transition** trans_lists;

  /* Allocate a sourcing statenode for this position if no entry yet */
  if (states_row[pos] == NULL)
    { /* Note alloc_statenode also allocates its trans_list */
      states_row[pos] = alloc_statenode ();
      init_statenode (states_row[pos], pos);
    };
  trans_lists = states_row[pos] -> trans_lists;

#ifdef SHOW_ZERO_DEST
  if (!len) abs_message ("insert_transition storing len %d into 0x%p \"%s\"",
			 len, transition, transition -> text);
#endif
  assert((len || is_eos_transition(transition)) && "insert_transition: has no length or is not an eos transition.");
  store_length (transition, len);

  /* insert new transition at the head */
  transition -> next = trans_lists[nont_class];
  trans_lists[nont_class] = transition;
#ifdef STATE_TRACE
  { char *hyph_beg = transition -> type & (InfixBit|SuffixBit) ? "-" : "";
    char *hyph_end = transition -> type & (InfixBit|PrefixBit) ? "-" : "";
    abs_message ("insert_transition: pos %d st %p class %d transit %p'%s%s%s' x%lx, next %p",
	         pos, states_row[pos], nont_class, transition, hyph_beg,
		 transition -> text, hyph_end, transition -> terminal, transition -> next);
  }
#endif
}

static void expand_and_insert_transition (StateNode** states_row, unsigned pos, int entry_idx,
					  Transition* transition, unsigned len)
{ Lexicon the_lex = get_lexicon ();
  int nontnr;

  while (add_transition_entry_params (transition, entry_idx, the_lex, &nontnr) &&
	 lxcn_try_advance_to_next_entry_in_list (the_lex, &entry_idx))
    { Transition* new = alloc_transition();
      *new = *transition;	/* copy whole struct */
      new -> type = transition -> type & ~TxtFreeBit;
      insert_transition (states_row, pos, nontnr, new, len);
    };
  insert_transition (states_row, pos, nontnr, transition, len);
}

static void append_to_transition_list (Transition** dest_ptr, Transition* state)
{ while (*dest_ptr != NULL)
    dest_ptr = &((*dest_ptr)->next);
  *dest_ptr = state;
}

/*------------------------------------------------------------------------------
// Function:
//	static State*
//	add_transitions(State** state_row, unsigned char* input, unsigned pos)
//
// Description:
//	Add transition states to each state at position pos in state_row,
//	by lexicalizing the input beyond the token in each state, unless
//	the state already has a transition to the next part in a parts-token.
//	Also, mark the state with TokenStart, so that add_parts_transitions
//	can see the difference and set TransTokenBit i.s.o. TransPartsBit.
//
// To do: TODO
//	Add transition to next token for parts ending before terminator,
//	even if they already have parts transtion.
//----------------------------------------------------------------------------*/
static Position lexicalize (Trellis* trellis, unsigned char* input, unsigned pos);

static void add_transitions (Trellis* trellis, unsigned char* input, unsigned pos)
{ StateNode** states_row = trellis->states_row;
  StateNode* st_node = states_row[pos];
  Transition* transition;
  int class;

  assert((st_node != NULL) && "add_transitions: st_node is NULL");
  for (class = NR_classes - 1; class >= 0; class--)
    { transition = st_node->trans_lists[class];
      while (transition != NULL)
	{ /*
	     For parts_tokens, backward_mark_partstate_sets() calls lexicalize ()
	     after the last part and marks the first part as token_start.
	     Their transitions are filled in by fix_parts_transitions()
	     after the whole sentence has been lexed.
	  */
	  if (!is_live_token_part(transition) && !is_eos_transition(transition))
	    { unsigned len = get_length (transition);
	      Position dest_pos = lexicalize (trellis, input, pos + len);
#ifdef SHOW_ZERO_DEST
	      if (!states_row[dest_pos])
		abs_message ("add_transitions: add_tr(0x%p, 0, %d)", transition, dest_pos);
#endif
	      add_transition (transition, states_row[dest_pos]);
	    };
	  if (!is_live_token_part(transition))
	    mark_token_start(transition); /* as opposed to interpart start */

	  /* marking of first part of a parts_token is done in
	   * backward_mark_partstate_sets */
	  transition = transition -> next;
	};
    };
}

/*------------------------------------------------------------------------------
// To be removed:
//
// Function:
//	static int skip_unknown_token (StateNode** state_row,
//	                               unsigned char* input, unsigned* pos_p)
//
// Description:
//	Skip input characters starting at the position pointed to by pos_p
//	until the next word terminator. However, skip at least one character.
//	For each skipped character, the corresponding position in state_row
//	is set to failure, and the position pointed to by pos_p is advanced.
//	Function log_unknown_token() is called for reporting the skipped string.
//----------------------------------------------------------------------------*/
static void skip_unknown_token (StateNode** state_row, unsigned char* input, unsigned* pos_p)
{ unsigned pos = *pos_p;
  unsigned char* token_txt = input + pos;
  unsigned token_pos = pos;
  unsigned token_len = 0;
  assert (!lxcn_is_eos(input[pos]) && "skip_unknown_token: eos position.");
  do
    { mark_failure (state_row, pos++);
      token_len++;
    }
  while (!lxcn_is_terminator (input[pos]));
  log_unknown_token (token_pos, token_txt, token_len);
  *pos_p = pos;
}

/*------------------------------------------------------------------------------
// Function:
//	static void do_skip_invisible_char (StateNodeNode** state_row,
//	                    unsigned char* input, unsigned* pos_p)
//
// Description:
//	Skip one invisible character in input starting at the position
//	pointed to by pos_p. The corresponding position in state_row is
//	set to failure, and the position pointed to by pos_p is advanced.
//----------------------------------------------------------------------------*/
static void do_skip_invisible_char (StateNode** state_row, unsigned char* input, unsigned* pos_p)
{ unsigned pos = *pos_p;
  assert(lxcn_is_invisible(input[pos]) && "do_skip_invisible_char: not invisible position.");
  mark_failure (state_row, pos++);
  *pos_p = pos;
}

static void may_skip_failures (StateNode **state_row, unsigned *pos_p)
{ unsigned pos = *pos_p;
  while (has_failure (state_row[pos])) pos++;
  *pos_p = pos;
}

/*------------------------------------------------------------------------------
// Function:
//	static unsigned char*
//	approx_match_lexeme(unsigned char* input, unsigned char* lexeme,
//	                    LexemeType lex_type, SeparatorType sep_type)
//
// Description:
//	Find approximate match of lexeme of type lex_type with any
//	prefix of the input. If sep_type is SepRequired, then the matched text
//	must be followed by a terminator (in multi-tokens and single-tokens).
//	A space in the terminal matches one or more blanks in the input.
//	If a character cannot be matched literally, its translation is tried.
//
// Return value:
//	A pointer to the first unmatched character, if a non-empty prefix
//	could be matched, or NULL else.
//
// To do:
//	Branch and bound for minimal edit distance.
//----------------------------------------------------------------------------*/

static unsigned char* approx_match_lexeme (unsigned char* input, unsigned char* lexeme,
					   LexemeType lex_type, SeparatorType sep_type)
{ unsigned char lex_mark = lxcn_get_lex_mark (lex_type);
  unsigned char c;

  /* Try to match the lexeme type, if there is one */
  if (lex_mark != EmptyMark)
    if (*lexeme++ != lex_mark)
      return (NULL);

  /* Try to match the text */
  while (!lxcn_is_eos (c = *lexeme++))
    { unsigned char d = *input;
      if (lxcn_is_space (c))
	{ /* a space matches any non empty white space */
	  if (!lxcn_is_blank (d)) return (NULL);
	  input++;
	  while (lxcn_is_blank (*input)) input++;
	}

      /* Match character or its translation */
      else if (c == d) input++;
      else if (c == lxcn_translate (d)) input++;
      else return (NULL);
    }

  /* Check if we end the lexeme appropriately */
  if (!lxcn_check_terminator (sep_type, *input)) return (NULL);
  return (input);
}

/*------------------------------------------------------------------------------
// Function:
//	static void init_regexp_limit (TokenLimit* token, unsigned char* str)
//	static void delimit_token (TokenLimit* limit)
//	static void restore_token (TokenLimit* limit)
//
// Description:
//	init_regexp_limit () finds boundary of str for regexp match.
//	delimit_token () delimits token with end-of-string.
//	restore_token () restores the boundary of the token.
//----------------------------------------------------------------------------*/
typedef struct
{ unsigned char *pos;
  unsigned char	save;
} TokenLimit;

static void init_regexp_limit (TokenLimit* limit, unsigned char* str)
{ unsigned char c;
  while (!lxcn_is_eos (c = *str)) str++;
  limit -> pos = (unsigned char*) str;
}
#define init_regexp_limit_to(limit, str, end) \
	((limit)->pos = (unsigned char*)(end))

static void delimit_token (TokenLimit* limit)
{ unsigned char *str = limit -> pos;
  limit -> save = *str;
  *str = EosMark;
}

static void restore_token (TokenLimit* limit)
{ unsigned char* str = limit -> pos;
  *str = limit -> save;
}

/*------------------------------------------------------------------------------
// Function:
//	static unsigned char*
//	match_all_regexpalternatives (unsigned char *input,
//		TokenLimit *limit, RegExp *regexp, int nalts, int *alts)
//
// Description:
//	Match all regexp alternatives with non-empty prefix of str. The prefix
//      should be followed by a character acceptable to sep_type. TokenLimit limit
//	is used for delimiting and restoring the token that can be matched.
//
// Return value:
//	Pointer to first unmatched char of non-empty prefix of str
//	matching regexp, if any, or NULL else.
//----------------------------------------------------------------------------*/
static unsigned char *match_all_regexpalternatives (unsigned char *input,
			TokenLimit *limit, RegExp *regexp, int nalts, int *alts)
{ unsigned char  *str;

  delimit_token(limit);
  /* abs_message ("Calling match_regexp(%p, %p)", input, regexp); */
  str = (unsigned char *) match_regexp_all_alternatives ((char *)input, regexp, nalts, alts);
  restore_token (limit);

  /* Don't consider the empty match a match */
  if (str == input) str = NULL;
  return (str);
}

static void insert_new_regexp_transition (StateNode** state_row, unsigned pos, int class,
					  unsigned id, unsigned char* from,
					  unsigned char* to, RegType reg_type)
{ Transition* transition = alloc_transition ();
  unsigned length = to - from;
  init_regexp_transition (transition, pos, id, from, to, reg_type);
  insert_transition (state_row, pos, class, transition, length);
}

/*------------------------------------------------------------------------------
// Function:
//      static unsigned
//	match_regexps(RegType reg_type, State** state_row,
//	              unsigned char* input, unsigned pos)
//
// Description:
//	Match all regexps at position pos in input. For each match,
//	a new state is created and inserted at position pos in state_row.
//	The boundary of the token to be matched is stored in TokenLimit.
//
// Return value:
//	The number of matched regexps.
//----------------------------------------------------------------------------*/
static unsigned match_regexps (RegType reg_type, StateNode** state_row, int nont_class,
			       unsigned char* input, unsigned pos,
		               SeparatorType sep_type, int do_all_lex_types)
{ unsigned nr_matches = 0;
  unsigned nr_regexps = get_nr_regexps(reg_type);
  unsigned char* from = input + pos;
  TokenLimit limit;

  init_regexp_limit(&limit, from);
  {
      RegExp *combined = get_combined_regexp(reg_type);
      assert(combined || nr_regexps == 0);

      if (combined) {
#define NALTS	1000		/* TODO: find some solution for this size limit */
	  int alts[NALTS];
	  unsigned char *to;

	  to = match_all_regexpalternatives(from, &limit, combined, NALTS, &alts[0]);

	  if (to == NULL) {
	      /* No regexp matched here. We're finished. */
	      /* abs_message("match_regexps: found no matches at all"); */
	      return 0;
	  } else {
	      int i = 0;

	      /*
	       * Walk over all the end positions of matches that were found
	       */
	      while (i < NALTS && alts[i] != -1) {
		  int endpos = alts[i++];
		  unsigned char *to = from + endpos;

		  /* and find the corresponding alternatives */
		  while (i < NALTS && alts[i] != 0) {
		      int alt = alts[i++] - 1;

		      /* check if we really want it */
		      if (endpos > 0 &&
			  (do_all_lex_types ||
			      regexp_lex_type(alt, reg_type) == SingleToken) &&
			  lxcn_check_terminator(sep_type, *to)) {
			  /* abs_message("match_regexps: found match #%d `%.*s'", alt, to-from, from); */
			  insert_new_regexp_transition(state_row, pos,
				  nont_class, alt, from, to, reg_type);
			  nr_matches++;
		      }
		  }
		  i++;	/* skip alts[i] which is 0 */
	      }
	      return nr_matches;
	  }
      }
  }
  return (nr_matches);
}

/* Note always match all lex types */
static int match_regexp_matches (StateNode** state_row, unsigned char* input, unsigned pos)
{ return (match_regexps (RegMatch, state_row, re_match_class, input, pos, SepDontCare, 1) > 0);
}

static int match_regexp_skips (StateNode** state_row, unsigned char* input, unsigned pos)
{ return (match_regexps (RegSkip, state_row, re_skip_class, input, pos, SepRequired, 0) > 0);
}

/*------------------------------------------------------------------------------
// Function:
//	int match_lexicon_terminals (StateNode** state_row, unsigned char* input,
//				     unsigned pos, LexemeType lex_type)
//
// Description:
//	Match all lexicon terminals at position pos in input with lex_type.
//	For each match, a new state is created and inserted
//	at position pos in state_row.
//	Lexicon terminals are identified by lxcn_approx_match_trie(),
//	saving each info, the matched lexeme and the next search position
//	in lxcn_TrieFrame frame.
//
// Return value:
//	The number of matched lexicon terminals.
//----------------------------------------------------------------------------*/
static int match_lexicon_terminals (StateNode** state_row, unsigned char* input,
				    unsigned pos, LexemeType lex_type)
{ Trie trie = get_trie ();
  unsigned nr_matches = 0;
  if (trie != NULL)
    { unsigned char *from = input + pos;
      unsigned char *to;
      TrieData trie_data = lxcn_init_approx_match_trie (from, lex_type, SepDontCare);
      while ((to = lxcn_approx_match_trie (trie, trie_data)) != NULL)
        { unsigned char* lex_beg = lxcn_get_lex_begin (trie_data);
          unsigned char* lex_end = lxcn_get_lex_end (trie_data);
          int entry_idx = lxcn_get_info (trie_data);
          Transition *transition = alloc_transition ();
          unsigned length = to - from; /* length of input, not lex data */

          if (lex_type != SingleToken)
            { /* skip control-character indicating lexeme type */
	      if (*lex_beg != lxcn_get_lex_mark (lex_type))
	        fprintf (stderr, "*lex_beg=%c, lexmark=%c\n",
			 *lex_beg, lxcn_get_lex_mark (lex_type));
	      assert ((*lex_beg == lxcn_get_lex_mark (lex_type)) &&
		      "match_lexicon_terminals: lex_beg is not lex_mark.");
	      lex_beg++;
	      /* length (of input) not affected */
            };
          init_lexicon_transition (transition, pos, entry_idx, lex_beg, lex_end, lex_type);
          expand_and_insert_transition (state_row, pos, entry_idx, transition, length);
          nr_matches++;
        };
      lxcn_exit_approx_match_trie (trie_data);
    };
  return (nr_matches);
}

/*------------------------------------------------------------------------------
// Function:
//      static int
//	match_grammar_terminals(StateNode** state_row, unsigned char* input,
//	                        unsigned pos,
//	                        LexemeType lex_type, SeparatorType sep_type)
//
// Description:
//	Match all grammar terminals at position pos in input with lex_type
//	and sep_type. For each match, a new state is created and inserted
//	at position pos in state_row.
//
// Return value:
//	The number of matched grammar terminals.
//----------------------------------------------------------------------------*/
static int match_grammar_terminals (StateNode** state_row, unsigned char* input,
	                	    unsigned pos, LexemeType lex_type)
{ unsigned nr_matches = 0;
  unsigned nr_terminals = get_nr_terminals ();
  unsigned char* from = input + pos;
  unsigned ix;
  for (ix = 0; ix < nr_terminals; ix++)
    { unsigned char *lexeme = get_terminal (ix);
      unsigned char *to = approx_match_lexeme (from, lexeme, lex_type, SepDontCare);
      if (to != NULL)
	{ Transition* transition = alloc_transition ();
	  unsigned length = to - from;
	  init_terminal_transition (transition, pos, ix, lex_type);
	  insert_transition (state_row, pos, gr_term_class, transition, length);
	  nr_matches++;
	};
    };
  return (nr_matches);
}

static unsigned match_terminals (StateNode** state_row, unsigned char* input, unsigned pos,
				 LexemeType lex_t)
{ unsigned nr_matches = 0;
  nr_matches += match_grammar_terminals (state_row, input, pos, lex_t);
  nr_matches += match_lexicon_terminals (state_row, input, pos, lex_t);
  return (nr_matches);
}

/*------------------------------------------------------------------------------
//----------------------------------------------------------------------------*/
static void match_other (StateNode** state_row, unsigned char* input, unsigned this_pos)
{ unsigned pos = this_pos;
 
  unsigned char* token_txt = input + pos;
  unsigned token_len = 0;
  Transition *transition = alloc_transition ();
  assert (!lxcn_is_eos(input[pos]) && "match_other: eos position.");
  do
    { pos++;
      token_len++;
    }
  while (!lxcn_is_terminator (input[pos]));
  init_other_transition (transition, this_pos, token_txt, token_len);
  insert_transition (state_row, this_pos, other_class, transition, token_len);
}

/*------------------------------------------------------------------------------
// Type:
//	PartState
//
// Description:
//	PartState represents the state of the finite state machine for
//	recognizing parts-tokens. The state contains bits for indicating
//	the token types that have been recognized. A new state is obtained
//	by or-ing the previous state with a TransitionType.
//	A PartState is final if its associated token contains at least
//	a single token or an infix. Otherwise, the token should be extended
//	with other parts.
//----------------------------------------------------------------------------*/
typedef TransitionType	PartState;
static PartState get_next_part_state (Transition* state)
{ return (state -> type & PartBitsMask);
}

static int is_final_part_state (PartState part_state)
{ return (part_state & (SuffixBit | SingleTokenBit | MultiTokenBit));
}

static int can_take_part_state_trans (PartState cur_part_state, Transition* state)
{ switch (state -> type & PartBitsMask)
    { case InfixBit:
      case SuffixBit: return (cur_part_state != 0);
      case PrefixBit:
      case SingleTokenBit:
      case MultiTokenBit: return (!(cur_part_state & (SuffixBit | SingleTokenBit | MultiTokenBit)));
      default: assert(0 && "can_take_part_state_trans: ...but it is unreachable code!");
    };
  return (0);
}

/*------------------------------------------------------------------------------
// The translation of a part_state to the corresponding bit in
// AggregatePartStates is done here. The rest of this file may use
// the knowledge that AggregatePartStates is a bit_set (so other routines
// can use & and | on it and initialize an aggregate to 0).
//----------------------------------------------------------------------------*/
static AggregatePartStates part_state2set_bit (PartState part_state)
{ return (1 << (part_state & PartBitsMask));
}

static AggregatePartStates initial_part_states_subset (AggregatePartStates cur_set)
{ return (cur_set & part_state2set_bit (0));
}

static AggregatePartStates final_part_states_subset (AggregatePartStates cur_set)
/* TODO: optimize by creating a final_set mask at program initialization */
{ PartState cur_part_state;
  AggregatePartStates result = 0;
  for (cur_part_state = 0; cur_part_state <= MaxPartState; cur_part_state++)
    if (cur_set & part_state2set_bit (cur_part_state))
      { if (is_final_part_state (cur_part_state))
          result |= part_state2set_bit (cur_part_state);
      };
  return (result);
}

static AggregatePartStates next_partstate_bit (Transition* state, PartState part_state)
{ return (part_state2set_bit (get_next_part_state (state)));
}

/*------------------------------------------------------------------------------
// Function:
//	static AggregatePartStates
//	add_trans_dest_part_states(AggregatePartStates cur_aggr, State* state,
//	                                AggregatePartStates* next_aggr_p)
// Description:
//	For each of the possible part_states at the current position,
//	add the part_state resulting from this transition (if it's a
//	valid one) to the part_state_set at the destination position.
// Return value:
//	A bit array with all part_states occurring at this position
//	that can also make a transition through this State.
//----------------------------------------------------------------------------*/
static void add_trans_dest_part_states (AggregatePartStates cur_aggr, Transition* transition,
					AggregatePartStates* next_aggr_p)
{ PartState cur_part_state;

  for (cur_part_state = 0; cur_part_state <= MaxPartState; cur_part_state++)
    if (cur_aggr & part_state2set_bit(cur_part_state))
      { if (can_take_part_state_trans(cur_part_state, transition))
          *next_aggr_p |= next_partstate_bit(transition, cur_part_state);
      };
}

/*------------------------------------------------------------------------------
// Function:
// Description:
//	for all part_states in the current set, check if this transition
//	would result in a part_state in the 'next' set.
// Return value:
//	The subset of cur_aggr for which the above test succeeds.
//----------------------------------------------------------------------------*/
static AggregatePartStates find_parts_trans_live_subset (AggregatePartStates cur_aggr,
	                	 Transition* transition, AggregatePartStates next_aggr)
{ PartState cur_part_state;
  AggregatePartStates result = 0;

  for (cur_part_state = 0; cur_part_state <= MaxPartState; cur_part_state++)
    if (cur_aggr & part_state2set_bit (cur_part_state))
      { if (can_take_part_state_trans (cur_part_state, transition) &&
	    (next_aggr & next_partstate_bit (transition, cur_part_state)))
        result |= part_state2set_bit(cur_part_state);
      };
  return (result);
}

/*------------------------------------------------------------------------------
// Function:
//	static void add_parts_transitions (Trellis* trellis, unsigned char* input,
//					   Position pos, Position *last_part_end);
//
// Description:
//	For each state at position pos in state_row, try to extend a
//	parts-token by matching new parts in the next PartState.
//	At this point, we match all parts, regardless of the validness
//	of the resulting sequence (validity is checked after matching
//	a complete parts_token, the components of a valid parts_token
//	get the TokenPartBit on; dead parts are removed after lexing
//	the whole sentence).
//
//	If a parts-token cannot be extended, and it ends at a terminator,
//	the rest of the input is lexicalized, resulting in a normal
//	transition to the next token.
//	If a parts-token cannot be extended and there is no terminator,
//	the state is deleted. This means that no part at all could be
//	matched after the current one.
//
//	The order of the parts in the list is not yet important;
//	the list is not yet sorted and its tail is not yet shared.
//	The length of the part in each state should have been coded in the
//	trans-field of each state.
//
//	Uses last_part_end to keep track of the last position at which
//	a parts token has been found to end
//----------------------------------------------------------------------------*/

/*------------------------------------------------------------------------------
// Necessary forward declarations
//----------------------------------------------------------------------------*/
static void delete_transition (Transition* transition);
static StateNode* match_parts (Trellis* trellis, unsigned char* input, Position pos,
			       Position *last_part_end);

/*
   Does not really add transitions, only finds successor parts (within this word!)
   Deletes transition if there are no following parts
   and the next character is not a terminator.
*/
static void add_parts_transitions (Trellis* trellis, unsigned char* input, Position pos,
				   Position *last_part_end)
{ StateNode** parts_row = trellis -> pstates_row;
  StateNode* pst_node = trellis -> pstates_row[pos];
  int n_found_classes = 0;	/* nr of classes with live parts */
  int class;

#ifdef PARTS_TRACE
  abs_message ("add_parts_transitions begin for pos%d lpe%d", pos, *last_part_end);
#endif
  assert((pst_node != NULL) && "add_parts_transitions: pst_node is NULL.");
  for (class = NR_classes - 1; class >= 0; class--)
    { Transition* transition = pst_node -> trans_lists[class];
      Transition* done = NULL;
      while (transition != NULL)
        { /* Remember the next transition; we might delete this one */
	  Transition* next = transition -> next;
          Position next_pos;
          StateNode* trans_dest = NULL;
          assert(!has_transition(transition) && "add_parts_trans: has no transition.");
          next_pos = pos + get_length (transition);

          {
#ifdef PARTS_TRACE
	    char *hyph_beg = (transition -> type & (InfixBit|SuffixBit))? "-" : "";
	    char *hyph_end = (transition -> type & (InfixBit|PrefixBit))? "-" : "";
	    abs_message ("add_parts_transitions: trying '%s%s%s'->...",
	                 hyph_beg, transition -> text, hyph_end);
#endif /* PARTS_TRACE */

	    /*
	       Match the remainder of the input for this transition
	       Returns parts_row[next_pos]
	    */
	    trans_dest = match_parts (trellis, input, next_pos, last_part_end);
#ifdef PARTS_TRACE
	    if (trans_dest != NULL)
	      abs_message ("add_parts_transitions: added '%s%s%s'->%p", hyph_beg,
		           transition -> text, hyph_end, trans_dest);
	    else
	      abs_message ("add_parts_transitions: no match '%s%s%s'->", hyph_beg,
		           transition -> text, hyph_end);
#endif /* PARTS_TRACE */
          }
#ifdef PARTS_TRACE
          abs_message ("add_parts_transitions pos%d trans_dest%p next_pos%d\n",
		       pos, trans_dest, next_pos);
#endif
          if ((trans_dest != NULL) || lxcn_is_terminator(input[next_pos]))
            { /* Either there is a connecting part or at end of word */
	      if (trans_dest == NULL || has_failure(trans_dest))
	        { if (*last_part_end < next_pos)
	            { *last_part_end = next_pos;
#ifdef PARTS_TRACE
	              abs_message ("add_parts_transitions pos%d set lpe%d=%d\n",
				   pos, *last_part_end, next_pos);
#endif
	            }
	        }
	      transition -> next = done;
	      done = transition;
            }
          else
	    { /* No connecting part and not at valid end of word */
	      delete_transition (transition);
	    };

	  /* Step to next transition */
          transition = next;
        };

#ifdef PARTS_TRACE
      if (pst_node -> trans_lists[class] != NULL)
        abs_message ("add_parts_transitions: pos %d class %d now %p", pos, class, done);
#endif

      /* Just remember those transitions that led somewhere */
      pst_node -> trans_lists[class] = done;
      if (done != NULL)
        n_found_classes++;

    }; /* for class */

  /* We are done iterating over all classes */
  if (n_found_classes == 0)
    { /*
	 We have removed all parts at this pos, remove the parts-StateNode
	 too, to keep the assumption intact that a StateNode is never empty
	 MS: This may cause some reparsing
      */
      free_statenode (parts_row[pos]);
      parts_row[pos] = NULL;
    };
#ifdef PARTS_TRACE
  abs_message ("add_parts_transitions end for pos%d lpe%d", pos, *last_part_end);
#endif
}

static StateNode* match_parts (Trellis* trellis, unsigned char* input,
			       Position pos, Position *last_part_end)
{ StateNode** parts_row = trellis -> pstates_row;

  if (parts_row[pos] == NULL)
    { /* No knowledge yet about this position */
      match_terminals (parts_row, input, pos, Prefix);
      match_terminals (parts_row, input, pos, Infix);
      match_terminals (parts_row, input, pos, Suffix);
      match_terminals (parts_row, input, pos, SingleToken);
      match_terminals (parts_row, input, pos, MultiToken);
      match_regexp_matches (parts_row, input, pos);
      if (parts_row[pos] != NULL)
        { /* We have found parts that matched */
#if (defined(PARTS_TRACE) && 0)
          dump_trans_lists_ptrs ("match_parts < add", pos, parts_row[pos] -> trans_lists);
#endif

	  /* Check for connecting parts to which we can transit */
          add_parts_transitions (trellis, input, pos, last_part_end);
#ifdef DEBUG_RTS
          if ((parts_row[pos] != NULL) && (*last_part_end == 0))
            {
#ifdef COUNT_TRACE
	      abs_message ("match_parts (tok%ld): %ld,+%d'%c%c%c:last__end=%d after add_p__tr",
	                   n_parts_tok_returns + 1, n_trel_builds, pos, input[pos],
	                   input[pos+1], input[pos+2], *last_part_end);
#else
	      abs_message ("match_parts: %p+%d:last_part_end=%d after add_parts_transitions",
	                   input, pos, *last_part_end);
#endif /* COUNT_TRACE */
            };
#endif /* DEBUG_RTS */

          trellis -> last_part_end_from[pos] = *last_part_end;
        }
      else
        { /*
	     No matching parts found for this position
	     Add a failure StateNode *, so that we're not going to redo all the work.
	  */
	  mark_failure (parts_row, pos);
        }
    }
  else if (has_failure (parts_row[pos]))
    { /* do nothing */
    }
  else
    { /* We have been here before, and had something that matched */
      if (*last_part_end < trellis -> last_part_end_from[pos])
	*last_part_end = trellis -> last_part_end_from[pos];
#ifdef DEBUG_RTS
    if (*last_part_end == 0)
      {
#ifdef COUNT_TRACE
	abs_message ("match_parts (tok%ld): %ld,+%d'%c%c%c:last__end=%d from array",
	             n_parts_tok_returns + 1, n_trel_builds, pos, input[pos],
	              input[pos+1], input[pos+2], *last_part_end);
#else
	abs_message ("match_parts: %p+%d:last_part_end=%d from array",
	             input, pos, *last_part_end);
#endif /* COUNT_TRACE */
      }
#endif /* DEBUG_RTS */
   };
  return (parts_row[pos]);
}

/*------------------------------------------------------------------------------
// Description:
//	First, forward_mark_partstate_sets() marks each part_start
//	position within the token with bits for
//	all the part_states with which that position can be reached:
//	positions are scanned from left to right; for every
//	transition from a position, the mark at its destination pos is ORed
//	with bits for every part_state that can result from that transition.
//	The mark is called a 'set' of part_states or 'aggregate'.
//
//	Second, backward_mark_partstate_sets() scans the positions from
//	right to left, checking each
//	transition node if, from any of the part_states at the current
//	position, a part_state can be reached that has been found alive
//	at the destination position. If so, the node gets the TokenPartBit,
//	to mark it alive.
//	The part_states at the current position, for which no transition
//	would result in a live destination part_state, are removed
//	from the set (i.e., the old set is overwritten by
//	its subset with live part_states only).
//	If no live parts_transitions are left at a position, and it does have
//	final_part_states in its set, a lexicalize() is tried from there.
//	If that succeeds, the final_part_states are written over the old set.
//	If not, the empty set is written.
//----------------------------------------------------------------------------*/
static void forward_mark_partstate_sets (Trellis* trellis, AggregatePartStates *set_array,
	                        	 unsigned first_pos, unsigned last_part_end)
{ Position cur_pos;
  AggregatePartStates *cur_set = set_array;

  for (cur_pos = first_pos; cur_pos < last_part_end; cur_pos++)
  { StateNode *state = trellis->pstates_row[cur_pos];
   if (state != NULL && !has_failure(state))
   { Transition** trans_lists = state->trans_lists;
    int class;

    for (class = NR_classes - 1; class >= 0; class--)
    { Transition* transition = trans_lists[class];
#ifdef PARTS_TRACE
      if (transition != NULL)
      { abs_message ("forward_mark_partstate_sets: pos %d class %d list:%p",
	             cur_pos, class, transition);
      }
#endif
      while (transition != NULL)
      { Transition* next = transition -> next;
	unsigned pos_dist = get_length (transition);
	/* cur_set: set of part_states possible at cur_pos
	 * cur_set[pos_dist]: set of part_states possible at pos after trans
	 */
	/* The real work is to OR cur_set[pos_dist] with the part_states_set
	 * with which pos_dist is reached when this transition is taken
	 * (the ORing is done in add_trans_dest_part_states).
	 * Removing impossible transition states from the list can't
	 * be done here, because the part may be valid as a component of
	 * another token.
	 */
	add_trans_dest_part_states(*cur_set, transition, &(cur_set[pos_dist]));
	transition = next;
      } /* while */
    } /* for class */
   } /* if */
    cur_set++;
  } /* for pos */
}

static AggregatePartStates find_mark_partstate_live_set (Position cur_pos, Transition* transition,
							 AggregatePartStates* cur_set)
{   AggregatePartStates return_set = 0;
    /* WAS crashes here due to SIGBUS: transition = FAILURE */
    while (transition != NULL)
    { unsigned pos_dist = get_length (transition);
      AggregatePartStates trans_live_set;

#ifdef PARTS_TRACE
      char *hyph_beg = (transition -> type & (InfixBit|SuffixBit))? "-" : "";
      char *hyph_end = (transition -> type & (InfixBit|PrefixBit))? "-" : "";

      abs_message ("find_mark_partstate_live_sets: pos %d state %p'%s%s%s', "
		   "set x%lx->x%lx, next %p",
	           cur_pos, transition, hyph_beg, transition->text, hyph_end,
	           *cur_set, cur_set[pos_dist], transition->next);
#endif
      /* npx bug if: assert(!is_token_start(transition)); */

      /* trans_live_set: set of part_states for which this transition
      //	would result in a live transition path
      // return_set: OR of all trans_live_sets in this transition list
      // *cur_set: set of part_states possible at cur_pos
      // cur_set[pos_dist]: set of live part_states at pos after trans
      */
      trans_live_set = find_parts_trans_live_subset(*cur_set, transition, cur_set[pos_dist]);
#ifdef PARTS_TRACE
      abs_message ("find_mark_partstate_live_set: pos %d state %p'%s%s%s', "
		   "live x%lx, set x%lx->x%lx, next %p",
	           cur_pos, transition, hyph_beg, transition->text, hyph_end,
		   trans_live_set, *cur_set, cur_set[pos_dist], transition->next);
#endif
      if (trans_live_set != 0)
      { if (initial_part_states_subset(trans_live_set))
	{ /* this part can be start of token */
	  assert(!is_token_start(transition) && "find_mark_parkstate_lifeset: is not a start token."); /* after trying to fix npx bug */
	  mark_token_start(transition);
	}
	return_set |= trans_live_set;
	if (cur_set[pos_dist] & NextTokenStartsHere)
	{
#ifdef PARTS_TRACE
	  abs_message ("find_mark_partstate_live_set: pos %d state %p'%s%s%s' final",
	               cur_pos, transition, hyph_beg, transition->text, hyph_end);
#endif
	  mark_live_final_part(transition);
	}
	else
	{
#ifdef PARTS_TRACE
	  abs_message ("find_mark_partstate_live_set: pos %d state %p'%s%s%s' nonfinal",
	               cur_pos, transition, hyph_beg, transition->text, hyph_end);
#endif
	  mark_live_nonfinal_part(transition);
	}
      }
      transition = transition->next;
    } /* while */
    return (return_set);
}

static void backward_mark_partstate_sets (Trellis* trellis, AggregatePartStates *set_array,
			unsigned char* input, Position first_pos, unsigned last_part_end)
{ StateNode** parts_row = trellis -> pstates_row;
  int cur_pos; /* must be signed due to >= */
  AggregatePartStates* cur_set = set_array + last_part_end - first_pos;
#ifdef PARTS_TRACE
  abs_printf ("backward_mark_partstate_sets: pos %d..%d sets:", first_pos, last_part_end);
  for (cur_pos = first_pos; cur_pos <= last_part_end; cur_pos++)
  { abs_printf (" x%lx", set_array[cur_pos - first_pos]);
  }
  abs_printf ("\n");
#endif
  for (cur_pos = last_part_end; cur_pos >= (int) first_pos; cur_pos--)
  { /* live_set: OR of all trans_live_sets at this position */
    AggregatePartStates live_set = 0;

    /* no FAILURES are used (yet) in parts_row,
    // but we leave the test here in case things get redesigned :-)
    */
    if (parts_row[cur_pos] != NULL && !has_failure(parts_row[cur_pos]))
    {
      int class;
      for (class = NR_classes - 1; class >= 0; class--)
      {
	Transition* transition = parts_row[cur_pos]->trans_lists[class];
#  ifdef PARTS_TRACE
	if (transition != NULL)
	  abs_message ("backward_mark_partstate_sets: pos %d class %d list:%p",
	               cur_pos, class, transition);
#  endif
	live_set |= find_mark_partstate_live_set(cur_pos, transition, cur_set);
      } /* for class */
    } /* if !=NULL && !has_failure */
    if ((live_set == 0) && lxcn_is_terminator(input[cur_pos]))
    {
	live_set = final_part_states_subset(*cur_set);
#ifdef PARTS_TRACE
	abs_message ("backward_mark_partstate_sets: is_sep pos %d live x%lx, set x%lx",
	             cur_pos, live_set, *cur_set);
#endif
	if (live_set != 0)
	{
#ifdef PARTS_TRACE
	  abs_message ("backward_mark_partstate_sets: pos %d:lexing", cur_pos);
#endif
	  lexicalize (trellis, input, cur_pos); /* always succeeds */
#ifdef PARTS_TRACE
	  abs_message ("backward_mark_partstate_sets: pos %d NextTokenStartsHere", cur_pos);
#endif
	  live_set |= NextTokenStartsHere;
	}
    }
    *cur_set = live_set;
    cur_set--;
  } /* for cur_pos */
}

static void mark_partstate_sets (Trellis* trellis, unsigned char* input,
	                	 unsigned first_pos, unsigned last_part_end)
{ AggregatePartStates *aggr_array = alloc_init_aggregate_array (last_part_end + 1 - first_pos);
  aggr_array[0] = part_state2set_bit (0);
  forward_mark_partstate_sets (trellis, aggr_array, first_pos, last_part_end);
  backward_mark_partstate_sets (trellis, aggr_array, input, first_pos, last_part_end);
  free_aggregate_array (aggr_array);
}

/*
   forward_mark_partstate_sets(), backward_mark_partstate_sets(),
   and mark_partstate_sets() are called after each parts_token; 
   remove_nonlive_token_parts() and fix_parts_transitions() are
   called after lexing sentence.
*/
static Transition* reorder_parts_list_return_starters(Transition** tlist_handle)
{ Transition* starter_list;
  /* 2 handles to the destination where the respective pointers
  //	should be stored
  */
  Transition** starter_dest = &starter_list;
  Transition** rest_dest = tlist_handle;
  Transition* cur_transition = *tlist_handle;
  while (cur_transition != NULL)
  { if (is_token_start(cur_transition))
    { *starter_dest = cur_transition;
      starter_dest = &(cur_transition->next);
    }
    else
    { *rest_dest = cur_transition;
      rest_dest = &(cur_transition->next);
    }
    cur_transition = cur_transition->next;
  }
  *starter_dest = NULL;
  *rest_dest = starter_list;	/* link starter_list to end of rest_list */
  return starter_list;
}

/*------------------------------------------------------------------------------
// Function:
//	static State* prune_parts_tokens(State** state_row,
//	                        unsigned first_pos, unsigned last_part_end)
//	In a right-to-left loop, the transition pointers are set
//	correctly (replacing the length).
//	If a state is not marked 'live' (TokenPartBit), it is removed.
//	The remaining states get the parts_transition flag, unless the
//	TokenStart flag is set on the destination state (in which case they
//	get the regular (token_)trans flag). TokenStart is set on all
//	token beginnings (including the first part of a parts_token)
//	by add_transitions() which is called from lexicalize().
//TODO: can we end up with unreachable nodes after remove_ ?!?
//	
// Return value:
//	NULL if no parts tokens were left after pruning;
//	a State* otherwise.
//----------------------------------------------------------------------------*/

/*
// The starter parts (all alive) are already at end of list,
// and should be kept in the same order (because they are already
// linked from the end of the state_row list).
// The algorithm below keeps a destination-pointer, to where the
// next live state-ptr should be put.
*/

static void remove_nonlive_token_parts (Transition** tlist_handle)
{ Transition* state = *tlist_handle;
  Transition** live_dest = tlist_handle;
  while (state != NULL)
  { /* this keeps the live_parts in same order */
    Transition* next = state->next;
    if (is_live_token_part(state))
    { *live_dest = state;
      live_dest = &(state->next);
    }
    else
    { /* if all transitions in all classes are removed,
      // caller (fix_parts_admin) will delete state_node as well */
      delete_transition(state);
    }
    state = next;
  }
  *live_dest = NULL;	/* end of list */
}

#ifdef SHOW_ZERO_DEST
static void remove_erroneous_token_parts (Transition** tlist_handle, StateNode** state_row,
					  StateNode** parts_row, Position at_pos)
{   Transition* state = *tlist_handle;
    Transition** live_dest = tlist_handle;
    while (state != NULL) {
	/* this keeps the live_parts in same order */
	Transition* next = state->next;
	Position trans_pos = at_pos + get_length (state);

	if ((is_final_part(state) && !state_row[trans_pos])
	    || (is_nonfinal_part(state) && !parts_row[trans_pos])
	    || (!is_final_part(state) && !is_nonfinal_part(state))) {

	    abs_message ("remove_erroneous_token_parts: 0x%p: '%s' term=0x%x type=0x%x",
			 state, state->text, state->terminal, state->type);
	    abs_message ("      pos=%d dst=%d sr[dst]=0x%p pr[dst]=0x%p",
			 at_pos, trans_pos, state_row[trans_pos], parts_row[trans_pos]);

	    /* if all transitions in all classes are removed,
	    // caller (fix_parts_admin) will delete state_node as well */
	    delete_transition(state);
	} else {
	    *live_dest = state;
	    live_dest = &(state->next);
	}
	state = next;
    }
    *live_dest = NULL;	/* end of list */
}
#endif /* SHOW_ZERO_DEST */

static void fix_parts_transitions (StateNode** state_row, StateNode** parts_row,
	                           Transition* state, Position at_pos)
{ while (state != NULL)
  {
#ifdef PARTS_TRACE
    char * hyph_beg = state->type & (InfixBit|SuffixBit) ? "-" : "";
    char * hyph_end = state->type & (InfixBit|PrefixBit) ? "-" : "";
#endif
    Position trans_pos = at_pos + get_length (state);
    if (is_final_part(state))
    { if (is_nonfinal_part(state))
      { /* We seem to have both a parts_trans and a token_trans here.
	// Duplicate the state.
	*/
	Transition *orig_next = state->next;
	state->next = alloc_transition();
	*(state->next) = *state;        /* copy whole struct */
	state->next->type = state->type & ~TxtFreeBit;
	state->next->next = orig_next;
#ifdef SHOW_ZERO_DEST
	if (!parts_row[trans_pos])
	  abs_message ("fix_parts_transitions: add_d_tr(0x%p, 0, %d)", state, trans_pos);
	
#endif
	add_parts_transition(state, parts_row[trans_pos]);
	state = state->next;
      }
      may_skip_failures(state_row, &trans_pos);
#ifdef PARTS_TRACE
      abs_message ("fix_parts_transitions: pos %d state %p'%s%s%s' final=> %d:%p\n",
       at_pos, state, hyph_beg, state->text, hyph_end,
       trans_pos, state_row[trans_pos]);
#endif
#ifdef SHOW_ZERO_DEST
      if (!state_row[trans_pos])
	abs_message ("fix_parts_transitions: add_tr(0x%p, 0, %d)", state, trans_pos);
#endif
      add_transition(state, state_row[trans_pos]);
    }
    else
    {
#ifdef PARTS_TRACE
      abs_message ("fix_parts_transitions: pos %d state %p'%s%s%s' notfinal-> %d:%p",
       at_pos, state, hyph_beg, state->text, hyph_end,
       trans_pos, parts_row[trans_pos]);
#endif
#ifdef SHOW_ZERO_DEST
      if (!parts_row[trans_pos]) {
	 abs_message ("fix_parts_transitions: add_p_tr(0x%p, 0, %d)", state, trans_pos);
      }
#endif
      add_parts_transition(state, parts_row[trans_pos]);
    }
    state = state->next;
  }
}

static void fix_parts_admin (Trellis* trellis)
{ StateNode** parts_row = trellis -> pstates_row;
  StateNode** state_row = trellis -> states_row;
  int cur_pos;	/* must be signed due to >=0 test below */

  /* order does matter here; removing dead parts may change the
   * first state (pointed to from parts_row) */
  for (cur_pos = trellis -> length - 1; cur_pos >= 0; cur_pos--)
  { if ((parts_row[cur_pos] != NULL) && !has_failure (parts_row[cur_pos]))
    { int n_found_classes = 0;	/* nr of classes with live parts */
      Transition** ptrans_lists = trellis -> pstates_row[cur_pos] -> trans_lists;
      int class;
      for (class = NR_classes - 1; class >= 0; class--)
      { Transition** plist_handle = &(ptrans_lists[class]);
	remove_nonlive_token_parts (plist_handle);

#ifdef SHOW_ZERO_DEST
TODO_make_unnecessary_the_following_KLUDGE:
	remove_erroneous_token_parts(plist_handle,
					state_row, parts_row, cur_pos);
#endif
	fix_parts_transitions (state_row, parts_row, *plist_handle, cur_pos);
	if (*plist_handle != NULL) n_found_classes++;
      } /* for class */
#  ifdef PARTS_TRACE
      abs_message ("fix_parts_admin: pos %d n_found_classes %d", cur_pos, n_found_classes);
#  endif
      if (n_found_classes == 0)
      { /* We have removed all parts at this pos, remove the parts-StateNode
	// too, to keep the assumption intact that a StateNode is never empty
	*/
	free_statenode(parts_row[cur_pos]);
	parts_row[cur_pos] = NULL;
      }
    } /* if != NULL */
  } /* for cur_pos */
}

/*----------------------------------------------------------------------
// The parts that can be start of token are moved to the end of
// their parts_transition_lists. The tails of the lists
// (containing these parts) are appended (not copied)
// to the corresponding tail of the token_transition_lists.
// The effect is that a starter_part appears
// both in the token_transition_list (in case a transition path
// leads to a regular token transition at this pos)
// and in the parts_transition_list (in case another transition path
// leads to a parts transition at this pos).
//
// Caller assures that pstates_row[pos] != NULL
//---------------------------------------------------------------------*/
static StateNode* share_starter_parts_transitions (Trellis *trellis, Position pos)
{ StateNode** state_handle = &(trellis->states_row[pos]);
  Transition** ptrans_lists = trellis->pstates_row[pos]->trans_lists;
  int class;

  for (class = NR_classes - 1; class >= 0; class--)
  { Transition** plist_handle = &(ptrans_lists[class]);
    Transition* tlist_null = NULL;
    Transition** tlist_handle = &tlist_null;	/* so PARTS_TRACE prints 0 */

    if (*plist_handle != NULL)
    { /*
      // the tail of the list, containing the starter_parts, is
      // shared among the parts_ list and the state_ list
      */
      Transition* starters = reorder_parts_list_return_starters(plist_handle);
      if (starters != NULL)
      {
	if (*state_handle == NULL)
	{ /* no regular transitions have been found at this pos yet,
	  // so we need to create a transition vector here
	  */
	  *state_handle = alloc_statenode();
	  init_statenode(*state_handle, pos);
	}
	tlist_handle = &((*state_handle)->trans_lists[class]);
	append_to_transition_list(tlist_handle, starters);
      }
#ifdef PARTS_TRACE
      else if (*state_handle != NULL)
      { tlist_handle = &((*state_handle)->trans_lists[class]);
      }
      abs_message ("share_starter_parts_transitions: pos %d class %d tokens "
		   "%p parts %p starters %p",
	           pos, class, *tlist_handle, *plist_handle, starters);
#endif
    } /* if plist_handle */
  } /* for class */
  return (*state_handle);
}

/*------------------------------------------------------------------------------
// Function:
//      static int match_parts_tokens (Trellis* trellis,
//	                               unsigned char* input, unsigned pos)
//
// Description:
//	Match parts-tokens at position pos in input, and insert corresponding
//	states in state_row. A parts-token has the following syntax:
//	        prefix* (infix|word)+ suffix*
//	with at least one prefix, infix or suffix, and at most one word,
//	and not beginning or ending with an infix.
//
// Return value:
//	True, if at least one valid parts-token could be matched, or false else.
//
// Note:
//	Because we link all states at the same position with the next-pointer,
//	invalid part-tokens may still be possible (over-generation)!
//----------------------------------------------------------------------------*/
static int match_parts_tokens (Trellis* trellis, unsigned char* input, Position pos)
{ Position last_part_end = 0;
  StateNode* state = match_parts (trellis, input, pos, &last_part_end);
#ifdef PARTS_TRACE
  abs_message ("match_parts_tokens: pos %d state %p", pos, state);
#endif
  if (has_failure (state))
    state = NULL;
  if (state != NULL)
    { /* We really have found a matching sequence of part states */
#if (defined(PARTS_TRACE) && 0)
      dump_trans_lists_ptrs("match_parts_tokens", pos, state -> trans_lists);
#endif
      assert((last_part_end > 0) && "match_parts_tokens: last_part_end too small.");

      /*
         Mark-pass: mark those parts which are in a valid sequence;
         The dead ones are removed (to speed up parser) after lexing
	 this sentence; not as of lexing this token.
         At end of marking, lexicalize() from all possible endings of this token.
      */
      mark_partstate_sets (trellis, input, pos, last_part_end);

      /*
	 We want the parts that can also be start of token to appear both
	 in the parts_transition_list and in the token_transition_list.
         'state' may become NULL, if none of the parts is a starter
         and there were no other (non-parts) tokens found before.
      */
      state = share_starter_parts_transitions (trellis, pos);
    } /* if (state != NULL) */
#ifdef COUNT_TRACE
  n_parts_tok_returns++;
#endif
  return (state != NULL);
}

/*------------------------------------------------------------------------------
// Function:
//	static int match_eos(State** state_row, unsigned char* input, unsigned pos)
//
// Description:
//	Try to match end-of-sentence at position pos in input. If success,
//	insert eos-state at position pos in state_row.
//
// Return value:
//	True, if eos could be matched, or false else.
//----------------------------------------------------------------------------*/
static int match_eos (StateNode **state_row, unsigned char *input, unsigned pos)
{ if (lxcn_is_eos (input[pos]))
    { Transition *trans = alloc_transition ();
      init_eos_transition (trans, pos);
      insert_transition (state_row, pos, gr_term_class, trans, 0);
      return (1);
    };
  return (0);
}

/*------------------------------------------------------------------------------
// Function
//	StateNode* lexicalize (Trellis* trellis, unsigned char* input, unsigned pos)
//
// Description:
//	Perform lexical analysis of input, starting at position pos.
//	At this position, we either have failure, and skip to the next
//	position, or we have memoized a previous result, which is then
//	returned, or we have no result yet, in which case we try to
//	match the next token and the rest of the input following it.
//
//	If we have an invisible character, match lexemes that may
//	start with an invisible character. If nothing matches, skip
//	the invisible character,
//	and try again. If there are no more invisible
//	characters ahead, and we have not matched anything, try to
//	match all possible terminals, including end-of-sentence.
//	If we still don't have a match, skip the unknown token.
//
// Return value:
//	Pointer to first state at position of first valid token.
//----------------------------------------------------------------------------*/
static Position lexicalize (Trellis* trellis, unsigned char* input, unsigned pos)
{ StateNode** state_row = trellis -> states_row;
  int match = 0;

  while (!match)
    { /* skip_blanks_or_failures(state_row, input, &pos); */
      may_skip_failures (state_row, &pos);
      while (!match && lxcn_is_invisible (input[pos]))
	{ if (state_row[pos] != NULL) return (pos);

	  match = match_parts_tokens (trellis, input, pos);
	  if (!match)
	    { do_skip_invisible_char (state_row, input, &pos);
	      /* skip_blanks_or_failures (state_row, input, &pos); */
	      may_skip_failures (state_row, &pos);
	    };
	};

      if (!match)
	{ if (state_row[pos] != NULL) return (pos);
	  match = match_parts_tokens (trellis, input, pos) ||
		  /* only SingleToken REs */
	          match_regexp_skips (state_row, input, pos) ||
	          match_eos (state_row, input, pos);
	};

/*
      if (!match)
	skip_unknown_token (state_row, input, &pos);
      replacing:
*/
      if (!match)
        { match_other (state_row, input, pos);
	  match = 1;
	};
    };

  add_transitions (trellis, input, pos);
  assert ((state_row[pos] != NULL) && "lexicalize: state_row = NULL.");
  assert (!has_failure(state_row[pos]) && "lexicalize: has failure in state row position.");
  return (pos);
}

/*------------------------------------------------------------------------------
// Function
//	static void remove_failures (Trellis* trellis)
//
// Description:
//	Replace failure positions in state-row with empty state lists.
//----------------------------------------------------------------------------*/
static void remove_failures (Trellis* trellis)
{ StateNode **states_row = trellis -> states_row;
  unsigned len = trellis -> length;
  unsigned ix;
  for (ix = 0; ix < len; ix++)
    if (has_failure (states_row[ix])) states_row[ix] = NULL;
}

static void remove_parts_failures(Trellis* trellis)
{ StateNode** pstates_row = trellis -> pstates_row;
  unsigned len = trellis -> length;
  unsigned ix;
  for (ix = 0; ix < len; ix++)
    if (has_failure (pstates_row[ix])) pstates_row[ix] = NULL;
}

/*------------------------------------------------------------------------------
// Build the trellis by matching the input
// After lexicalization the first state is filled in and artifacts of the
// lexicalization are removed
//----------------------------------------------------------------------------*/
static void build_trellis (Trellis* trellis, unsigned char* input)
{ Position first_pos = lexicalize (trellis, input, 0);
  SET_FIRST_POS (trellis, first_pos);
  fix_parts_admin (trellis);	/* relies on failures still present */
  remove_failures (trellis);
  remove_parts_failures (trellis);
}

/*
   Initialize the trellis by filling in all default values
*/
static void init_trellis (Trellis* trellis, unsigned len, Lexicon the_lex)
{ StateNode** state_row = trellis -> states_row;
  StateNode** parts_row = trellis -> pstates_row;
  unsigned *last_end = trellis -> last_part_end_from;
  unsigned ix;
  for (ix = 0; ix < len; ix++)
    { *state_row++ = NULL;
      *parts_row++ = NULL;
      *last_end++ = 0;
    };
  trellis -> length = len;
  trellis -> lexicon = the_lex;
}

/*
   Annotate the remaining states and partial states with their proper line and column nr
*/
static void annotate_states_with_position (Trellis *trellis, char *input,
					   int linenr, int colnr)
{ StateNode** states_row = trellis -> states_row;
  StateNode** pstates_row = trellis -> pstates_row;
  unsigned len = trellis -> length;
  unsigned ix;
  for (ix = 0; ix < len; ix++)
    { if (states_row[ix] != NULL)
        { states_row[ix] -> linenr = linenr;
	  states_row[ix] -> colnr = colnr;
	};
      if (pstates_row[ix] != NULL)
	{ pstates_row[ix] -> linenr = linenr;
	  pstates_row[ix] -> colnr = colnr;
	};
      if (input[ix] == '\n')
	{ linenr++;
          colnr = 0;
	}
      else colnr++;
    };
}

/*
   Adjust the penalties of lexicon terminals relative to each other,
   by finding the smallest among the possible transitions out of a state,
   and subtracting it from all. The penalties in the trellis are only
   actually used in LEX_MATCH, i.e. for lexicon terminals. To avoid 
   a deceptive trellis dump, we take care to avoid affecting other
   transitions ($MATCH, $SKIP, grammar terminal). {{{
*/
static Penalty find_smallest_penalty_of_states_row (StateNode* state_node, Penalty smallest)
{ int class;
    for (class = nr_lexicon_nonterminals - 1; class >= 0; class--)
    { Transition* transition = state_node -> trans_lists[class];
	while (transition)
	{ Penalty penalty = transition -> penalty;
	  if (penalty < smallest) smallest = penalty;
	  transition = transition -> next;
	}
    }
    return smallest;
}

/*------------------------------------------------------------------------------
// We use the knowledge that the shared part
// of the state_list is the tail (and consists fully of token-parts).
// We also use the fact that the classes of the lexicon nonterminals are
// [ 0 , nr_lexicon_nonterminals >.
//----------------------------------------------------------------------------*/
static void adjust_relative_penalties_of_states_row (StateNode* state_node, Penalty delta, int always)
{ int class;
    for (class = nr_lexicon_nonterminals - 1; class >= 0; class--)
    { Transition* transition = state_node -> trans_lists[class];
	while (transition)
	{ if (always || !is_live_token_part(transition))
	  { transition -> penalty -= delta;
	  }
	  transition = transition -> next;
	}
    }
}

static void adjust_relative_penalties (Trellis *trellis)
{ StateNode** states_row = trellis -> states_row;
  StateNode** pstates_row = trellis -> pstates_row;
  unsigned len = trellis -> length;
  unsigned ix;

  for (ix = 0; ix < len; ix++)
    { Penalty smallest = INT_MAX;
      if (states_row[ix] != NULL)
        { smallest = find_smallest_penalty_of_states_row(states_row[ix], smallest);
	}
      if (pstates_row[ix] != NULL)
	{ smallest = find_smallest_penalty_of_states_row(pstates_row[ix], smallest);
	}
      /*
       * Also use the occasion to increase all penalties on all transitions
       * by penalty_transition.
       */
      smallest -= penalty_transition;
      if (states_row[ix] != NULL)
        { adjust_relative_penalties_of_states_row(states_row[ix], smallest, 0);
	}
      if (pstates_row[ix] != NULL)
	{ adjust_relative_penalties_of_states_row(pstates_row[ix], smallest, 1);
	}
    }
}

/* }}} */

static void open_neg_memos_for_transition (Transition* state, NegMemo* neg_memos)
{
#ifndef STANDALONE_LEXER
    unsigned neg_memo_size = get_nr_neg_memos ();
    unsigned long* neg_memo_directors = NULL;
    unsigned long word, bit;
    unsigned i;

    /* determine memo directors bit-vector for this state */
    if (is_lexicon_transition(state)) {
	neg_memo_directors = lex_memo_dir[DECODE_NONT_NUMBER(state->terminal)];
    } else if (is_terminal_transition(state)) {
	neg_memo_directors = term_memo_dir[DECODE_TERM_NUMBER(state->terminal)];
    } else if (is_match_regexp_transition(state)) {
	neg_memo_directors = match_memo_dir[DECODE_REGEXP_NUMBER(state->terminal)];
    } else if (is_skip_regexp_transition(state)) {
	neg_memo_directors = skip_memo_dir[DECODE_REGEXP_NUMBER(state->terminal)];
    } else if (is_eos_transition(state)) {
	neg_memo_directors = term_memo_dir[DECODE_TERM_NUMBER(state->terminal)];
/* Other */
    } else if (is_other_transition(state)) {
        for (i = 0; i < neg_memo_size; i++)
          neg_memos[i] = NEGMEMO_UNKNOWN;
	return;
    } else abs_abort ("open_neg_memos_for_transition", "42");

    /* open neg_memos in bit-vector */
    word = *neg_memo_directors++;
    bit = 0;
    for (i = 0; i < neg_memo_size; i++) {
	if (bit == BITS_PER_WORD) {
	    word = *neg_memo_directors++;
	    bit = 0;
	}

	if (word & (0x01 << bit++)) {
	    neg_memos[i] = NEGMEMO_UNKNOWN;
	}
    }
#endif /* STANDALONE_LEXER */
}

static void add_state_neg_memos (StateNode* state, NegMemo* neg_memos)
{ if (state != NULL)
    { if (directors_option)
	{ int class;
	  for (class = NR_classes - 1; class >= 0; class--)
	    { Transition* transition = state -> trans_lists[class];
	      while (transition != NULL)
		{ open_neg_memos_for_transition (transition, neg_memos);
		  transition = transition -> next;
		};
	    };
	};
      state -> neg_memos = neg_memos;
    };
}

#ifdef PMRTS
static void add_state_pos_memos (StateNode* state, PosMemo *pos_memos)
{ if (state != NULL) state -> pos_memos = pos_memos;
}

static void add_state_lrec_markers (StateNode* state, char *markers)
{ if (state != NULL) state -> lrec_markers = markers;
}
#endif /* PMRTS */

static void initialize_neg_memos (NegMemo* neg_memos)
{ unsigned neg_memo_size = get_nr_neg_memos();
  unsigned ix;
  if (directors_option)
    for (ix = 0; ix < neg_memo_size; ix++) *neg_memos++ = NEGMEMO_BLOCKED;
  else for (ix = 0; ix < neg_memo_size; ix++) *neg_memos++ = NEGMEMO_UNKNOWN;
}

#ifdef PMRTS
static void initialize_pos_memos (PosMemo *pos_memos)
{ int ix;
  for (ix = 0; ix < get_nr_syntax_nonterminals(); ix++)
    posmemo_init_table_entry (&(pos_memos[ix]));
}

static void initialize_lrec_markers (char *markers)
{ int ix;
  for (ix = 0; ix < get_nr_syntax_nonterminals(); ix++) markers[ix] = 0;
}

static void reset_pos_memos (PosMemo *pos_memos)
{ int ix;
  int posmemo_size = get_nr_syntax_nonterminals ();
  if (pos_memos == NULL) return;
  for (ix = 0; ix < posmemo_size; ix++)  
    { PosMemo *memo = &(pos_memos[ix]);
      if (*memo != NULL)
	posmemo_free_vec (memo);
    };
}
#endif /* PMRTS */

/*
   We must take into account below, that some states (the first parts of
   a parts_token) occur both in the parts_row and the state_row.
   For memos, no harm is done by writing the same pointer twice.
*/
static void add_trellis_neg_memos (Trellis* trellis)
{ if (neg_memo_option || directors_option)
    { StateNode** state_row = trellis -> states_row;
      StateNode** parts_row = trellis -> pstates_row;
      unsigned len = trellis -> length;
      unsigned i;
      for (i = 0; i < len; i++)
	{ StateNode* state = *state_row++;
	  StateNode* pstate = *parts_row++;
	  if ((state != NULL) || (pstate != NULL))
	    { NegMemo* neg_memos = alloc_neg_memos ();	/* shared by all states at this pos */
	      initialize_neg_memos (neg_memos);
	      add_state_neg_memos (state, neg_memos);
	      add_state_neg_memos (pstate, neg_memos);
	    };
	};
    };
}

#ifdef PMRTS
void add_trellis_pos_memos (Trellis* trellis)
{ if (pos_memo_option)
    { StateNode** state_row = trellis -> states_row;
      StateNode** parts_row = trellis -> pstates_row;
      unsigned len = trellis -> length;
      unsigned i;
      for (i = 0; i < len; i++)
	{ StateNode* state = *state_row++;
	  StateNode* pstate = *parts_row++;
	  if ((state != NULL) || (pstate != NULL))
	    { PosMemo* pos_memos = alloc_pos_memos ();	  /* shared by all states at this pos */
	      char *lrec_markers = alloc_lrec_markers (); /* shared by all states at this pos */
	      add_state_pos_memos (state, pos_memos);
	      add_state_lrec_markers (state, lrec_markers);
	      add_state_pos_memos (pstate, pos_memos);
	      add_state_lrec_markers (pstate, lrec_markers);
	      initialize_pos_memos (pos_memos);
	      initialize_lrec_markers (lrec_markers);
	    };
	};
    };
}

void reset_trellis_pos_memos (Trellis* trellis)
{ if (pos_memo_option)
    { StateNode** state_row = trellis -> states_row;
      StateNode** parts_row = trellis -> pstates_row;
      unsigned len = trellis->length;
      unsigned i;
      for (i = 0; i < len; i++)
	{ StateNode* state = *state_row++;
	  StateNode* pstate = *parts_row++;
	  PosMemo *pos_memos = NULL;
	  if (state != NULL) pos_memos = state -> pos_memos;
	  else if (pstate != NULL) pos_memos = pstate -> pos_memos;
	  reset_pos_memos (pos_memos);
	};
    };
}

#endif /* PMRTS */

#ifdef COUNTERS
static NegMemo* get_neg_memo_table (Trellis* trellis, unsigned pos)
{ StateNode** state_row = GET_TRELLIS_STATE_ROW (trellis);
  StateNode** parts_row = GET_TRELLIS_PARTS_ROW (trellis);
  StateNode* state = (state_row[pos] != 0) ? state_row[pos] : parts_row[pos];
  return (state == NULL) ? NULL : state -> neg_memos;
}

#ifdef PMRTS
static PosMemo* get_pos_memo_table (Trellis* trellis, unsigned pos)
{ StateNode** state_row = GET_TRELLIS_STATE_ROW (trellis);
  StateNode** parts_row = GET_TRELLIS_PARTS_ROW (trellis);
  StateNode* state = (state_row[pos] != 0) ? state_row[pos] : parts_row[pos];
  return (state == NULL) ? NULL : state -> pos_memos;
}
#endif /* PMRTS */

void show_neg_memo_blocks (Trellis* trellis)
{ unsigned neg_memo_size = get_nr_neg_memos();
  if (neg_memo_size > 0)
  { unsigned nr_neg_memos = 0;
    unsigned nr_blocked = 0;
    unsigned len = trellis->length;
    unsigned i;
    for (i = 0; i < len; i++)
    { NegMemo* neg_memos = get_neg_memo_table(trellis, i);
      if (neg_memos != 0)
      { unsigned j;
	for (j = 0; j < neg_memo_size; j++)
	{ if (neg_memos[j] == MEMO_BLOCKED)
	    nr_blocked++;
	}
	nr_neg_memos += neg_memo_size;
      }
    }
    abs_message ("Blocked %d out of %d neg_memos (%.0f%%)\n",
	         nr_blocked, nr_neg_memos, nr_blocked * 100.0 / nr_neg_memos);
  }
}

#ifdef PMRTS
void show_pos_memo_blocks(Trellis* trellis)
{ unsigned pos_memo_size = get_nr_pos_memos();
  if (pos_memo_size > 0)
  { unsigned nr_pos_memos = 0;
    unsigned nr_blocked = 0;
    unsigned len = trellis->length;
    unsigned i;
    for (i = 0; i < len; i++)
    { PosMemo* pos_memos = get_pos_memo_table(trellis, i);
      if (pos_memos != 0)
      { unsigned j;
	for (j = 0; j < pos_memo_size; j++)
	  if (posmemo_is_blocked(&pos_memos[j])) nr_blocked++;
	nr_pos_memos += pos_memo_size;
      }
    }
    abs_message ("Blocked %d out of %d pos_memos (%.0f%%)\n",
	         nr_blocked, nr_pos_memos, nr_blocked * 100.0 / nr_pos_memos);
  }
}
#endif /* PMRTS */
#endif

/*------------------------------------------------------------------------------
// Destruction of trellis
//----------------------------------------------------------------------------*/
static void delete_transition (Transition* transition)
{ if (transition -> type & (TxtFreeBit))
    abs_free ((char*) transition -> text, "delete_transition");
  if (transition -> params != NULL)
    abs_free ((TaggedValue *) transition -> params, "delete_transition");
  free_transition (transition);
}

/*------------------------------------------------------------------------------
// In delete_state_list, we use the knowledge that the shared part
// of the state_list is the tail (and consists fully of token-parts).
//----------------------------------------------------------------------------*/
static void delete_transition_list (Transition* transition, int always)
{ while ((transition != NULL) && (always || !is_live_token_part (transition)))
    { Transition* next = transition ->next;
      delete_transition (transition);
      transition = next;
    };
}

static void delete_state (StateNode* state, int always)
{ if (state != NULL)
    { Transition** trans_lists = state -> trans_lists;
      int class;
      for (class = NR_classes - 1; class >= 0; class--)
	{ if (*trans_lists != NULL)
	    delete_transition_list (*trans_lists, always);
	  trans_lists++;
	};
      free_statenode(state);
    };
}

static void maybe_free_neg_memos (NegMemo* memos)
{ if (memos != NULL)
    free_neg_memos (memos);
}

#ifdef PMRTS
static void maybe_free_pos_memos (PosMemo* memos)
{ if (memos != NULL)
    free_pos_memos (memos);
}

static void maybe_free_lrec_markers (char *markers)
{ if (markers != NULL)
    free_lrec_markers (markers);
}
#endif

void delete_trellis (Trellis* trellis)
{ StateNode** state_row = GET_TRELLIS_STATE_ROW (trellis);
  StateNode** parts_row = GET_TRELLIS_PARTS_ROW (trellis);
  unsigned len = trellis -> length;
  unsigned i;
#ifdef SHOW_LEXINFO_NRS
  show_lexinfo_nrs("start of delete_trellis");
#endif
  for (i = 0; i < len; i++)
    { StateNode* state = *state_row++;
      StateNode* parts = *parts_row++;
      if ((state != NULL) || (parts != NULL))
	{ /* if state != NULL,
	  //   parts->neg_memos == state->neg_memos (or parts == NULL);
	  // if state == NULL, parts != NULL, so we can safely dereference it
	  */
	  if (state != NULL)
	    { maybe_free_neg_memos (state -> neg_memos);
#ifdef PMRTS
	      maybe_free_pos_memos (state -> pos_memos);
	      maybe_free_lrec_markers (state -> lrec_markers);
#endif /* PMRTS */
	    }
	  else
	    { maybe_free_neg_memos (parts -> neg_memos);
#ifdef PMRTS
	      maybe_free_pos_memos (parts -> pos_memos);
	      maybe_free_lrec_markers (parts -> lrec_markers);
#endif /* PMRTS */
	    };
	  delete_state (state, 0);
	  delete_state (parts, 1);
	}
    };

  free_trellis (trellis);
#ifdef SHOW_LEXINFO_NRS
  show_lexinfo_nrs ("end of delete_trellis");
#endif
}

/*------------------------------------------------------------------------------
// Printing of trellis
//----------------------------------------------------------------------------*/
static void print_terminal_text (Transition* state)
{ char *outxt = dupstr_escaped (state -> text);

  if (is_eos_transition (state))
    abs_printf ("%s", outxt);
  else
    { int hyph_beg = state -> type & (InfixBit | SuffixBit);
      int hyph_end = state -> type & (InfixBit | PrefixBit);
      abs_printf (hyph_beg ? "\"-" : "\"");
      abs_printf ("%s", outxt);
      abs_printf (hyph_end ? "-\"" : "\"");
    };
  abs_free (outxt, "print_terminal_text");
}

/* We should have access to a list of (grammar & lexicon) nonterminal names.
// For now, rtslex gets the name from the lexicon through rtslint.
*/
static void current_parse_add_nonterminal (Lexicon lex, int nontnr)
{ abs_printf (lexicon_get_nont_name (lex, nontnr));
}

#ifdef STANDALONE_LEXER
static void current_parse_add_match_regexp (long termnr)
{ abs_printf ("<standalone_noname>" /* match_regexp_names[termnr] */);
}

static void current_parse_add_skip_regexp (long termnr)
{ abs_printf ("<standalone_noname>" /* skip_regexp_names[termnr] */);
}

static char param_type_to_char (int par_kind)
{ switch (GET_KIND_TYPE(par_kind))
  { case lxcn_SetKind:	return ('S');
    case lxcn_IntKind:	return ('I');
    case lxcn_TextKind:	return ('T');
    default:		return ('X');
  };
}

static void print_param (TaggedValue cur_par, long nontnr)
{ abs_printf ("%ld %c", nontnr, param_type_to_char(cur_par.kind));
  switch (GET_KIND_TYPE(cur_par.kind))
    { case lxcn_SetKind:
	abs_printf (" x%08lx", cur_par.value.set_par);
	break;
      case lxcn_IntKind:
//	if (cur_par.value.int_par == ANY_INT)
	    abs_printf ("A:INT");
//	else
//	  abs_printf (" %d", cur_par.value.int_par);
	break;
      case lxcn_TextKind:
//	if (cur_par.value.text_par == ANY_TEXT)
	    abs_printf ("A:TEXT");
//	else
//	  abs_printf ("\"%s\"", cur_par.value.text_par);
	break;
      default:
	abs_printf ("--> Unknown param <--");
    } /* switch (GET_KIND_TYPE(cur_par.kind)) */
}
#endif /* STANDALONE_LEXER */

#if 0
/*
   This code should not be reactivated. If so, it must be moved to
   lexicon_print.c (which is non-existent at the moment).
*/

/* These functions probably needed for some debugging */
static void lex_print_set_affix_helper (Lexicon lex, int lhs_nr, Bitset set_val)
{ int nr_aff = lexicon_get_nr_setaffixes (lex);
  int i;
  for (i = 0; i < nr_aff; i++)
    if (lexicon_affix_belongs_to_lhs (lex, i, lhs_nr))
      { Bitset bitset = lexicon_get_setaffix_bitset (lex, i);
        if (bitset == set_val)
	  { abs_printf ("%s", lexicon_get_setaffix_name (lex, i));
	    return;
	  }
        else if (bitset & set_val)
	  { lex_print_set_affix_helper(lex, i, set_val);
	    return;
          };
      };
}

static void lex_print_set_affix (Lexicon lex, long nontnr, int parno, unsigned long set_val)
{ /*
     OK, this is quite expensive, but we don't care for speed while
     printing the lexical graph.
  */
  int lhs_nr = lexicon_get_nont_param_nr (lex, nontnr, parno);
  Bitset bitset = lexicon_get_setaffix_bitset (lex, lhs_nr);

#ifdef DEBUG_RTS
  abs_message ("lex_print_set_affix(%p, %ld, %d, %lx) lhs_nr == %u",
	       lex, nontnr, parno, set_val, lhs_nr);
#endif /* DEBUG_RTS */

  if (bitset == set_val) abs_printf ("%s", lexicon_get_setaffix_name (lex, lhs_nr));
  else lex_print_set_affix_helper(lex, lhs_nr, set_val);
}
#endif /* 0 */

static void print_affix (Value value, long domain)
{ switch (domain)
    { case TEXT_TYPE:	print_text_affix (value.text_par, 0); break;
      case INT_TYPE:	print_integer_affix (value.int_par, 0); break;
      default:
	 /* MS: to change with intro of larger affixes Set type */
	 print_set_affix (value.set_par, domain, 0);
	 break;
    }
}

static void print_transition (Transition* trans, Lexicon lex)
{   StateIndicator trans_dest = get_transition_dest(trans);
    int parno;
#ifdef DEBUG_NONTNR
    int stype = DECODE_TERM_TYPE (trans->terminal);
#endif

    print_terminal_text (trans);
    if (trans -> penalty)
      abs_printf (" [%ld]", trans -> penalty);
    if (is_lexicon_transition (trans))
      { int arity = DECODE_NONT_ARITY(trans -> terminal);
	int nontnr = DECODE_NONT_NUMBER(trans -> terminal);
#ifndef STANDALONE_LEXER
	long *pdomain = nont_domains[nontnr];
#endif /* STANDALONE_LEXER */
#ifdef DEBUG_NONTNR
	abs_printf ("[%d] %x#%ld:", trans -> penalty, stype, nontnr);
#else
	abs_printf (" ");
#endif
	current_parse_add_nonterminal (lex, nontnr);

	for (parno = 0; parno < arity; parno++)
          { if (parno == 0) abs_printf ("(");
	    else abs_printf (", ");

#ifdef STANDALONE_LEXER
	    print_param (trans -> params[parno], nontnr);
#else
	    print_affix (trans -> params[parno].value, *pdomain++);
#endif /* STANDALONE_LEXER */
	  };

	if (arity > 0) abs_printf (")");
      }
    else
      {
#ifdef DEBUG_NONTNR
	abs_printf ("[%d] %x#%ld:", trans->penalty, stype, DECODE_TERM_NUMBER(trans->terminal));
#endif

	if (is_skip_regexp_transition(trans))
	  { abs_printf (" $SKIP(\"");
	    current_parse_add_skip_regexp(DECODE_REGEXP_NUMBER(trans -> terminal));
	    abs_printf ("\")");
	  }
	else if (is_match_regexp_transition(trans))
	  { abs_printf (" $MATCH(\"");
	    current_parse_add_match_regexp(DECODE_REGEXP_NUMBER(trans -> terminal));
	    abs_printf ("\")");
	  }
	else if (is_other_transition(trans))
	  abs_printf (" $OTHER");
	else if (is_terminal_transition(trans))
	  { /* we don't want the same text again
	    // (unfortunately grammar nonterminal is not available)
	    // current_parse_add_terminal(DECODE_TERM_NUMBER(trans -> terminal));
	    */
	  };
	/* $end of sentence$ not shown */
    }

    if (has_transition(trans)) {
#ifdef SHOW_ZERO_DEST
      if (!trans_dest)
	{ abs_message (" %s ***", (has_token_transition(trans) ? "=>" : "->"));
	  abs_message ("*** ERROR: dest=0x%p in trans 0x%p \"%s\" ***",
			trans_dest, trans, trans -> text);
        }
      else
	{
#endif
	  if (has_token_transition(trans)) abs_printf (" => ");
	  else abs_printf (" -> ");
#ifdef DEBUG
	  abs_printf ("{%08x} ", trans -> type);
#endif
	  if (trans_dest) abs_printf ("%d", STATE_POS(trans_dest));
	  else abs_printf ("(none)");

#ifdef SHOW_ZERO_DEST
        } /* if (!trans_dest) */
#endif

#if defined(PRINT_STATE_PTRS)
	abs_printf (" (%p)", trans_dest);
#endif
    }
}

static void print_transition_list (Transition* trans, Lexicon lex,
				   int* have_printed, TransTest* do_pr)
{ while (trans != NULL)
    { Transition* next_trans = trans -> next;

      if (do_pr (trans))
	{ if (*have_printed)
#ifdef DEBUG
	       abs_printf (",\n\t");
#else
	       abs_printf (", ");
#endif
	  else *have_printed = 1;
	  print_transition(trans, lex);
	} /* if do_pr */

      trans = next_trans;
    } /* while */
}

static void print_state_transitions (StateNode* state, Lexicon lex,
				     int *have_printed, TransTest* do_pr)
{ /* state != NULL, caller has checked */
  Transition** trans_lists = state -> trans_lists;
  int class;
  for (class = 0; class < NR_classes; class++)
    { if (*trans_lists != NULL)
	print_transition_list (*trans_lists, lex, have_printed, do_pr);
      trans_lists++;
    }
}

/*--------------------------------------------------------------------------
// Top level routine for printing the lexical graph.
// At each input position, there are two lists of transitions
// (each of which may be empty):
// A. the regular list, which will be used when a token transition
//	        leads to this position; it contains the symbols
//	        that may appear at the start of a token.
//	        This list is only built if lexicalize() is called
//	        for this position, i.e. if complete tokens can
//	        start here.
//	        These transitions have the token_start flag.
// B. the parts list, which will be used when a parts transition
//	        leads to this position; it contains the symbols
//	        that may appear in a parts_token.
//	        This list is built both through add_parts_transitions()
//	        (scanning for parts in the middle of a word)
//	        and through lexicalize().
//	        These transitions have the (live_)token_part flag.
// Some parts are in both lists:
//	those that can be the first part of a token
//	i.e. single tokens, prefix parts, and infix parts
//	(provided this pos is a place where words can start).
//	These parts are moved to the end of the parts list,
//	and the pointer to the first of them is copied to
//	the end of the regular list, thus resulting in a shared tail.
//	These transitions have both the token_start and 
//	the (live_)token_part flag.
// The transitions are printed in the following order:
//	1. those that are only in the regular list
//	   (I have never seen one! Can they logically occur?)
//	2. those that are only in the parts list
//	3. those that are in both lists
//	   (the most common situation)
// If PRINT_PARTS_SEMICOLON is #defined (in DEBUG mode),
//	a ';' is printed between 1. and 2. and between 2. and 3.,
//	in order to show the data structure in greater detail.
// 
//------------------------------------------------------------------------*/
void print_trellis (Trellis* trellis)
{   Lexicon lex = trellis->lexicon;
    StateNode **state_row = trellis -> states_row;
    StateNode **parts_row = trellis -> pstates_row;
    unsigned len = trellis -> length;
    unsigned pos;
    char *out_line_sep; /* gets printf'd, so watch out for % signs */
#ifdef SHOW_LEXINFO_NRS
    show_lexinfo_nrs("start of print_trellis");
#endif
    if (lex_out_format == G_OUT_FORMAT_TRELLIS_INPUT) out_line_sep = "\t";
    else out_line_sep = "\n";

    for (pos = 0; pos < len; pos++)
      { StateNode* state = *state_row++;
	StateNode* pstate = *parts_row++;
	int did_print = 0;

	if (state != NULL) {
	    abs_printf ("%4d (%d,%d) ", pos, state -> linenr, state -> colnr);
	    /* print nonshared (first) part of state_row lists: */
#ifdef PRINT_STATE_PTRS
	    abs_printf ("(%p) ", state);
#endif
	    print_state_transitions (state, lex, &did_print, &not_live_token_part);
	    if (pstate != NULL) {
#ifdef PRINT_PARTS_SEMICOLON
	        abs_printf (";");
#endif
#ifdef PRINT_STATE_PTRS
	        abs_printf ("(%p) ", pstate);
#endif
	        /* print nonshared (first) part of parts_row list: */
	        print_state_transitions (pstate, lex, &did_print, &not_token_start);
#ifdef PRINT_PARTS_SEMICOLON
	        abs_printf (";");
#endif
	        /* print shared (last) part of both lists: */
	        print_state_transitions (state, lex, &did_print, &is_live_token_part);
	    }

	    abs_printf (out_line_sep);
	} else if (pstate != NULL) {
	    abs_printf ("%4d (%d,%d) ", pos, pstate -> linenr, pstate -> colnr);
#ifdef PRINT_PARTS_SEMICOLON
	    abs_printf ("; ");
#endif
#ifdef PRINT_STATE_PTRS
	    abs_printf ("(%p) ", pstate);
#endif
	    /* print whole parts_row list (there is no state_row list) */
	    print_state_transitions(pstate, lex, &did_print, &trans_true);
	    abs_printf (out_line_sep);
	}
    }

    if (lex_out_format == G_OUT_FORMAT_TRELLIS_INPUT)
      abs_printf ("\n");

#ifdef SHOW_LEXINFO_NRS
    show_lexinfo_nrs("end of print_trellis");
#endif
}

static void may_get_shorter_trans_from_state (StateNode *istate, Position *least_next_pos_p,
				 	      StateNode **shortest_p);

static void may_get_shorter_trans_from_list (Transition *trans_list, Position *least_next_pos_p,
					     StateNode **shortest_p)
/* may change *shortest_p into a StateNode at a position before least_next_pos
** (and change *least_next_pos_p accordingly)
*/
{ for (; trans_list; trans_list = trans_list->next) {
	Transition *cur_trans = trans_list;
	StateNode * cur_dest = cur_trans -> trans_dest_state;
	if (STATE_POS(cur_dest) < *least_next_pos_p) {
	    if (!has_token_transition(cur_trans)) {
		/* This is a transition to a following part.
		** We now look for the shortest sequence of parts
		** (of which only the last one has_token_transition).
		*/
		may_get_shorter_trans_from_state(cur_dest, least_next_pos_p,
								shortest_p);
	    } else {
		*shortest_p = cur_dest;
		*least_next_pos_p = STATE_POS(cur_dest);
	    }
	}
    } /* for */
} /* may_get_shorter_trans_from_list */

static void may_get_shorter_trans_from_state (StateNode *istate,
					      Position *least_next_pos_p, StateNode **shortest_p)
{ int class;
  assert((istate != NULL) && "may_get_shorter_trans_from_state: state is NULL.");

  /* not using lex_info->nr_nont_classes, as it isn't yet filled correctly */
  for (class = 0; class < NR_classes; class++)
    { if ((class != re_match_class) && (class != re_skip_class))
	may_get_shorter_trans_from_list (istate -> trans_lists[class],
					 least_next_pos_p, shortest_p);
    };

  if (!*shortest_p)
    { may_get_shorter_trans_from_list (istate -> trans_lists[re_match_class],
				       least_next_pos_p, shortest_p);
      may_get_shorter_trans_from_list (istate -> trans_lists[re_skip_class],
				       least_next_pos_p, shortest_p);
    };
}

/*-------------------------------------------------------------------------
// Find the nearest StateNode (i.e. the one with the lowest pos) that can
// be reached from here after a full transition (e.g. a single token or a
// sequence of parts). Regexp ($MATCH or $SKIP) transitions are only
// considered if there are no other transitions.
//
// If the state has an EOS transition, we'll take that one.
//
// Return value: NULL if no transition, pointer to new StateNode otherwise.
//-----------------------------------------------------------------------*/
StateNode* get_shortest_transition (Trellis *trellis, StateNode *istate)
{ StateNode *shortest = NULL;
  Position least_next_pos;

  assert (trellis && "get_shortest_transition: no trellis.");
  assert (istate && "get_shortest_transition: no state.");
  if (state_has_eos_transition (istate))
    return (NULL);
    
  least_next_pos = trellis -> length + 1; /* > any nextpos */
  may_get_shorter_trans_from_state (istate, &least_next_pos, &shortest);
  return (shortest);
}

/*
  The start point of the building of the trellis

  To do: initialization of neg memo from director sets
*/
Trellis *make_trellis_by_word_lexing (char *input, int linenr, int colnr, Lexicon the_lex)
{   Trellis *trellis;
    unsigned len;

#ifdef SHOW_LEXINFO_NRS
    show_lexinfo_nrs("start of make_trellis_by_word_lexing");
#endif

    /* allocate the trellis structure and fill with NULLs as default */
    len = strlen (input) + 1;
    trellis = alloc_trellis (len);
    init_trellis (trellis, len, the_lex);
#ifdef COUNT_TRACE
    n_trel_builds++;
#endif

    /* Build the trellis from the input */
    build_trellis (trellis, (unsigned char*) input);
    annotate_states_with_position (trellis, input, linenr, colnr);
    adjust_relative_penalties (trellis);

    /*
       Add neg_memo vector to all states at the same position.
       If directors_option, block neg_memos that are not possible according
       to the directors of the neg_memos.
       Note that open_neg_memos_for_state() expects the lexicon states to
       be expanded.
    */
    add_trellis_neg_memos (trellis);
#ifdef PMRTS
    add_trellis_pos_memos (trellis);
#endif /* PMRTS */

#ifdef SHOW_LEXINFO_NRS
    show_lexinfo_nrs ("end of make_trellis_by_word_lexing");
#endif
    return (trellis);
}

int state_has_eos_transition (StateNode *the_state)
{ Transition* gr_trans_list = the_state -> trans_lists[gr_term_class];
  return ((gr_trans_list != NULL) && is_eos_transition(gr_trans_list));
}

int is_empty_trellis (Trellis* trellis)
{ return (state_has_eos_transition (GET_FIRST_STATE_INDICATOR (trellis)));
}

Position STATE_POS (StateNode *ind)
{ return (ind->pos);
}

void SET_FIRST_POS(Trellis *trel, Position pos)
{ trel->first_state = trel->states_row[pos];
  return;
}

StateIndicator GET_FIRST_STATE_INDICATOR (Trellis *trel)
{ return (trel -> first_state);
}

int IS_LASTPART (Transition *t)
{ return (t -> type & TransTokenBit);
}

int HAS_PARTS_TRANSITION (Transition *t)
{ return (t -> type & TransPartsBit);
}

Terminal TRANSITION_TERMINAL (Transition *tra)
{ return (tra -> terminal);
}

TaggedValue *TRANSITION_PARAMS (Transition* tra)
{ return (tra -> params);
}

char* TRANSITION_TEXT (Transition* tra)
{ return (tra -> text);
}

struct Transition* TRANSITION_NEXT_TRANS (Transition *tra)
{ return (tra -> next);
}

Penalty TRANSITION_PENALTY (Transition *tra)
{ return (tra -> penalty);
}

StateNode** GET_TRELLIS_STATE_ROW (Trellis *trel) 
{ return (trel -> states_row);
}

StateNode** GET_TRELLIS_PARTS_ROW (Trellis *trel) 
{ return (trel -> pstates_row);
}

StateIndicator TRANSITION_DEST_STATE_INDICATOR (Transition *tra, Trellis *trel)
{ return (tra -> trans_dest_state);
}

Transition* GET_STATE_TRANSLIST (Trellis *trel, StateIndicator i_st, ARG cls)
{ return (i_st -> trans_lists[cls]);
}
