/*
   File: rtstrelinp.c
   Parses trellis input (lexed by external program)

   Copyright 2005 Radboud University of Nijmegen
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
 
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU Library General Public License for more details.
 
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

   CVS ID: "$Id: rtstrelinp.c,v 1.14 2005/05/04 13:44:53 marcs Exp $"
*/

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /* HAVE_CONFIG_H */

#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <assert.h>

/* libabase includes */
#include <abase_error.h>
#include <abase_memalloc.h>
#include <abase_lex_input.h>

/* liblexicon include */
#include <lexicon.h>
#include <lexicon_search.h>

/* local includes */
#include "rtsesc.h"
#include "rtscode.h"
#include "rtstrelinp.h"

static int have_warned_about_parts = 0;	/* "interpreting '->' as '=>'" */

static void reverse_trans_lists_at(Trellis *trellis, long srcpos)
{
    Transition **tra_lists = trellis->states_row[srcpos]->trans_lists;
    int class;
    for (class = NR_classes - 1; class >= 0; class--) {
	Transition* transition = tra_lists[class];
	if (transition && transition->next) {
	    Transition* rev_list = NULL;
	    do {
		Transition *next = transition->next;
		transition->next = rev_list;
		rev_list = transition;
		transition = next;
	    } while (transition);
	    tra_lists[class] = rev_list;
	}
    }
}

static void may_skip_spaces(unsigned char const * * const hinput)
{
    while (**hinput == ' ') {
	(*hinput)++;
    }
} /* may_skip_spaces */

static void may_skip_and_zero_spaces(unsigned char **hinput)
{
    while (**hinput == ' ') {
	**hinput = '\0';
	(*hinput)++;
    }
} /* may_skip_and_zero_spaces */

static void may_zero_out_spaces_backward_from(unsigned char *pinput)
{
    while (*pinput == ' ') {
	*pinput = '\0';
	(pinput)--;
    }
} /* may_zero_out_spaces_backward_from */

static int parse_decimal(unsigned char const * * const hinput, long *dval)
/* format: [0-9]+
// assign decimal value to dval
// advance hinput to char after last digit in sequence
// hinput unchanged if returning nonzero (i.e. error code)
*/
{
    unsigned char const *pinp = *hinput;
    *dval = 0;
    while (*pinp >= '0' && *pinp <= '9') {
	*dval = *dval * 10 + (*pinp - '0');
	pinp++;
    }
    if (pinp > *hinput) {
	*hinput = pinp;
	return 0;
    }
    abs_message ("** expected decimal value at '%s'", *hinput);
    return -EINVAL;
} /* parse_decimal */

static int skip_decimal(unsigned char * * const hinput)
/* advance hinput to char after last digit in sequence
// hinput unchanged if returning nonzero (i.e. error code)
//	(because parse_decimal() does not change it in that case)
*/
{
    long dummy;
    unsigned char const *pcp = *hinput;
    int retval = parse_decimal(&pcp, &dummy);
    *hinput += (pcp - *hinput);
    return retval;
} /* skip_decimal */

static int parse_quoted_string(unsigned char const * * const hinput,
				char const **qsbeg, char const **qsend)
/* make qsbeg point to char after opening quote, qsend to closing quote
**	(skipping \-escaped quotes)
** advance hinput to char after closing quote
** hinput unchanged if returning nonzero (i.e. error code)
*/
{
    unsigned char const *pinp = *hinput + 1;

    if (**hinput != '"') {
	abs_message ("** expected quote at '%s'", *hinput);
	return -EINVAL;
    }
    *qsbeg = pinp;
    while (*pinp != '"') {
	if (*pinp == '\\') {
	    pinp++;
	}
	if (!*pinp) {
	    abs_message ("** expected quoted string at '%s'", *hinput);
	    return -EINVAL;
	}
	pinp++;
    }
    *qsend = pinp;	/* points to closing quote */
    *hinput = pinp + 1;
    return 0;
} /* parse_quoted_string */

static int skip_quoted_string(unsigned char * * const hinput)
/* advance hinput to char after closing quote
// hinput unchanged if returning nonzero (i.e. error code)
//	(because parse_quoted_string() does not change it in that case)
*/
{
    char const *dummy1;
    char const *dummy2;
    unsigned char const *pcp = *hinput;
    int retval = parse_quoted_string(&pcp, &dummy1, &dummy2);
    *hinput += (pcp - *hinput);
    return retval;
} /* skip_quoted_string */

static long find_set_affix_idx(long domain, unsigned char const *affname)
/* returns -1 if not found in domain
*/
{
    long *weight = affix_weights[domain];
    for (; *weight != -1; weight++) {
	if (!strcmp((char *) affname, affix_names[*weight])) {
		return *weight;
	}
    }
    return -1;
} /* find_set_affix_idx */

static int fill_set_param(long ntaffdom, unsigned char *parptr, unsigned long *result)
/* parptr is either a single affix_name,
//	or an OR of affix_names, separated by '|'s
//	In the latter case, each '|' and surrounding spaces are zeroed out.
//	Caller is expected to have zeroed out any spaces at
//	the begin and end of the parptr string.
*/
{
    *result = 0;
    do {
	unsigned char const *partext = parptr;

	parptr = strchr(parptr, '|');
	if (parptr) {
	    may_zero_out_spaces_backward_from(parptr - 1);
	    *parptr = '\0';
	    parptr++;
	    may_skip_and_zero_spaces(&parptr);
	}
	{
	    long affidx = find_set_affix_idx(ntaffdom, partext);

	    if (affidx == -1) {
		abs_message ("** affix '%s' invalid at this point", partext);
		return -EDOM;
	    }
	    *result |= affix_domains[affidx];
	}
	partext = parptr;
    } while (parptr);
    return 0;
} /* fill_set_param */

static int fill_trelinp_param(LXCN_PARAM *paramp, long ntaffdom, unsigned char *parptr)
{
    unsigned char const *partext = parptr;

    switch(ntaffdom) {
	case TEXT_TYPE: {
	    char const *txtbeg;
	    char const *txtend;
	    int retval;

	    paramp->kind = lxcn_TextKind;
	    if (!strcmp(partext, "TEXT")) {
		paramp->value.text_par = TOP_TEXT;
		return 0;
	    }
	    if ((retval = parse_quoted_string(&partext, &txtbeg, &txtend))) {
		return retval;
	    }
	    /* caller has assured that param is quoted string, check anyway */
	    if (*partext) {
		abs_message ("** extraneous '%s' in TEXT '%s'", partext, parptr);
		return -EINVAL;
	    }
#ifdef TEXT_AFF_PLAIN
	    paramp->value.text_par = copy_string(txtbeg, txtend - txtbeg);
#else
	    if ((retval = dupsubstr_unescaped(txtbeg, txtend, &paramp->value.text_par)))
	       return retval;
#endif
	    return 0;
	}
	case INT_TYPE: {
	    int retval;
	    long long_par;

	    paramp->kind = lxcn_IntKind;
	    if (!strcmp(partext, "INT")) {
		paramp->value.int_par = TOP_INT;
		return 0;
	    }
	    if ((retval = parse_decimal(&partext, &long_par))) {
		return retval;
	    }
	    /* caller has assured that param is all digits, check anyway */
	    if (*partext) {
	       abs_message ("** extraneous '%s' in INT '%s'", partext, parptr);
		return -EINVAL;
	    }
	    paramp->value.int_par = long_par;
	    return 0;
	}
	default: { /* Set type */
	    paramp->kind = lxcn_SetKind;
	    return fill_set_param(ntaffdom, parptr, &(paramp->value.set_par));
	}
    }
} /* fill_trelinp_param */

static int process_trelinp_re_name (unsigned *retrenr,
				    RegType regtype, unsigned char const **thinput)
{
    int retval;
    char const *rtbeg;
    char const *rtend;
    char *re_name;
    unsigned renr;
    unsigned nr_regexps = get_nr_regexps(regtype);

    const char **re_nams;
    switch (regtype) {
	case RegMatch:	re_nams = match_regexp_names;
			break;
	case RegSkip:	re_nams = skip_regexp_names;
			break;
        default:
            assert(!"unknown regexp type");
            re_nams = NULL; /* to keep gcc quiet */
    }

    if ((retval = parse_quoted_string(thinput, &rtbeg, &rtend))) {
	return retval;
    }
#if 0
    retval = dupsubstr_unescaped(rtbeg, rtend, &re_name);
    if (retval) {
	return retval;
    }
#else
    /* generator now stores REs without removing backslashes (as it should) */
    re_name = copy_string(rtbeg, rtend - rtbeg);
#endif
    for (renr = 0; renr < nr_regexps; renr++) {
	if (!strcmp(re_name, re_nams[renr])) {
		*retrenr = renr;
		abs_free(re_name, "process_trelinp_re_name");
		return 0;
	}
    }
    abs_message ("** unknown regexp '%s'", rtbeg); /* print with escapes */
    abs_free (re_name, "process_trelinp_re_name");
    return -EDOM;
} /* process_trelinp_re_name */

static int
process_trelinp_re_indicator(unsigned *p_renr, RegType *p_regtype,
				int *p_nclass, unsigned char const **hcp)
/* format: $MATCH[(]["].*["][)]
//     or: $SKIP[(]["].*["][)]
*/
{
    unsigned char const *pcp = *hcp;
    int retval;

    if (!strncmp(pcp, "$MATCH", 6)) {
	    pcp += 6;
	    *p_regtype = RegMatch;
	    *p_nclass = re_match_class;
    } else if (!strncmp(pcp, "$SKIP", 5)) {
	    pcp += 5;
	    *p_regtype = RegSkip;
	    *p_nclass = re_skip_class;
    } else {
	    abs_message ("** expected $MATCH or $SKIP at '%s'", pcp);
	    return -EINVAL;
    }

    if (*pcp != '(') {
	    abs_message ("** expected '(' at '%s'", pcp);
	    return -EINVAL;
    }
    pcp++;
    if ((retval = process_trelinp_re_name(p_renr, *p_regtype, &pcp))) {
	    return retval;
    }
    if (*pcp != ')') {
	    abs_message ("** expected ')' at '%s'", pcp);
	    return -EINVAL;
    }
    pcp++;
    *hcp = pcp;
    return 0;
} /* process_trelinp_re_indicator */

static int process_trelinp_terminal (Trellis *trellis, Transition *transition,
				     long srcpos, char const *txtbeg, char const *txtend)
{
    unsigned n_terminals = get_nr_terminals();
    unsigned tnr;
    /* Order of initialization is IMPORTANT: */
    char const *txtstart = txtbeg;	/* original, for error msg */
    abs_LexemeType lex_type = derive_lex_type_and_strip_hyphens(&txtbeg, &txtend);
    unsigned char lex_mark = abs_get_lex_mark(lex_type);
#ifdef TERMNAMES_WITH_ESC
    unsigned txtlen = txtend - txtbeg;
#else
    /* generator stores termnames after removing \ escapes, do same for find: */
    char *txtdup;
    int retval = dupsubstr_unescaped(txtbeg, txtend, &txtdup);
    if (retval) {
	return retval;
    }
    txtbeg = txtdup;
    txtend = txtbeg + strlen(txtbeg);
#endif

    if (lex_mark == abs_EmptyMark) {
	lex_mark = *txtbeg;	/* make it match below */
    }

    /* TODO: what if multiple terminals match? Can that happen? */
    for (tnr = 0; tnr < n_terminals; tnr++) {
	char const *ttxt = get_terminal_text(tnr, lex_type);
	if (
#ifdef TERMNAMES_WITH_ESC
	    !strncmp(txtbeg, ttxt, txtlen)
	    && strlen(ttxt) == txtlen
#else
	    !strcmp(txtbeg, ttxt)
#endif
	    && *get_terminal(tnr) == lex_mark) {

	    init_terminal_transition(transition, srcpos, tnr, txtbeg, txtend, lex_type);
#ifndef TERMNAMES_WITH_ESC
	    abs_free(txtdup, "process_trelinp_terminal");
#endif
	    return (0);
	}
    }
    abs_message ("** unknown terminal at '%s'", txtstart); /* show hyphens, if any */
#ifndef TERMNAMES_WITH_ESC
    abs_free(txtdup, "process_trelinp_terminal");
#endif
    return -EDOM;	/* terminal not found */
} /* process_trelinp_terminal */

/* nontname may (absolutely) not contain chars in "(-="
// nontname 1st char may not be in "-=$"
// nont names and affix names MAY contain spaces
// affix name chars may not be in ",)"
// can we hold:	letcapdig=[0-9A-Za-z];
//		nontname={letcapdig}|{letcapdig}[{letcapdig} ]*{letcapdig}
// and:		affch=[0-9A-Za-z+-]
//		affixname={affch}|{affch}[{affch} ]*{affch}
*/

int is_valid_nontchar(unsigned char ch)
{
    return (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z')
	   || (ch >= 'a' && ch <= 'z') || ch == ' ';
}

int is_valid_affchar(unsigned char ch)
{
    return (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z')
	   || (ch >= 'a' && ch <= 'z') || ch == '-' || ch == '+' || ch == ' ';
}

static int count_affixes_and_zero_out_separation(unsigned char **h_affseq, int *p_arity)
/* h_affseq starts at char after '(' and usually ends at closing ')'
// stuff between params (commas and spaces) is zeroed out
*/
{
    unsigned char *pvp = *h_affseq;
    int retval;

    do {
	may_skip_and_zero_spaces(&pvp);
	if (*pvp == '"') {	/* text affix */
	    if ((retval = skip_quoted_string(&pvp))) {
		return retval;
	    }
	} else if (!is_valid_affchar(*pvp)) {
	    abs_message ("** expected quote, number or affix name at '%s'", pvp);
	    return -EINVAL;
	} else if (*pvp >= '0' && *pvp <= '9') {	/* int affix */
	    if ((retval = skip_decimal(&pvp))) {
		return retval;
	    }
	} else {	/* set affix, maybe combined by |  */
	    while (is_valid_affchar(*pvp) || *pvp == '|') {
		pvp++;
	    }
	    may_zero_out_spaces_backward_from(pvp - 1);
	}
	(*p_arity)++;
    } while ((*pvp == ',') && !((*pvp++ = 0)));
    *h_affseq = pvp;
    return 0;
} /* count_affixes_and_zero_out_separation */

static int
process_trelinp_nonterminal(LEXICON* trellex, Transition *transition,
		int *p_nclass, long srcpos,
		char const *txtbeg, char const *txtend, unsigned char **hinput)
{
    abs_LexemeType lex_type = derive_lex_type_and_strip_hyphens(&txtbeg, &txtend);
    unsigned char *pvp = *hinput;
    unsigned char *nontname;
    off_t nontnr;
    int retval;

    nontname = pvp;
    if (!is_valid_nontchar(*pvp)) {
	abs_message ("** expected nonterminal name at '%s'", pvp);
	return -EINVAL;
    }
    while (is_valid_nontchar(*++pvp)) {
	/* just skip it */
    }

    /* remove trailing spaces from nontname, which may have embedded spaces */
    may_zero_out_spaces_backward_from(pvp - 1);

    if (*pvp == '(') {
	/* NONT with params */
	int arity = 0;
	int parnr;
	unsigned char *partext = pvp;
	LXCN_PARAM *paramp;
	long *ntdom;
	char *txtdup;

	/* first, we count the arity
	** and zero out everything but the param names
	*/
	*pvp = '\0'; /* this may be trailing 0 of nontname */
	pvp++;
	if ((retval = count_affixes_and_zero_out_separation(&pvp, &arity))) {
	    return retval;
	}
	if (*pvp != ')') {
	    abs_message ("** expected ')' at '%s'", pvp);
	    return -EINVAL;
	}
	*pvp++ = '\0';
	if (!trellex) {
	    abs_message ("** lex nonterminal '%s/%d' found while no lexicon", nontname, arity);
	    return -EDOM;
	}
	nontnr = lxcn_find_lex_nonterminal_with_arity(trellex, nontname, arity);
	if (nontnr == -1) {
	    abs_message ("** unknown nonterminal '%s/%d'", nontname, arity);
	    return -EDOM;
	}
	ntdom = nont_domains[nontnr];

	retval = dupsubstr_unescaped(txtbeg, txtend, &txtdup);
	if (retval) {
	    return retval;
	}
	init_lexicon_transition(transition, srcpos,
				code_nonterminal(nontnr, arity),
				txtdup, txtdup + strlen(txtdup), lex_type);
	paramp = (LXCN_PARAM*) abs_calloc (arity, sizeof(LXCN_PARAM), "process_trelinp_nonterminal: params");
	transition->params = paramp; /* will be free'd by delete_transition */

	/* every sequence of nonzero chars is a
	**	quoted_string, number, or (|'ed) param_name(s)
	*/
	for (parnr = 0; parnr < arity; parnr++) {
	    unsigned char *thispar;
	    while (!*partext) {
		partext++;
	    }
	    thispar = partext;
	    /* fill_trelinp_param() below may zero out separating '|'s,
	    //	so skip param now
	    */
	    while (*partext) {
		partext++;
	    }
	    if ((retval = fill_trelinp_param(paramp, *ntdom, thispar))) {
		abs_message ("** while processing parnum %d of '%s'", parnr + 1, nontname);
		return retval;
	    }
	    ntdom++;	/* to domain of next affix position */
	    paramp++;	/* to next element of transition->params */
	}
    } else if (*(pvp - 1) == '\0' /* former ' ' */) {
	/* NONT without params */
	char *txtdup;

	if (!trellex) {
	    abs_message ("** lex nonterminal '%s/0' found while no lexicon", nontname);
	    return -EDOM;
	}
	nontnr = lxcn_find_lex_nonterminal_with_arity(trellex, nontname, 0);
	if (nontnr == -1) {
	    abs_message ("** unknown nonterminal '%s/0'", nontname);
	    return -EDOM;
	}
	retval = dupsubstr_unescaped(txtbeg, txtend, &txtdup);
	if (retval) {
	    return retval;
	}
	init_lexicon_transition(transition, srcpos, code_nonterminal(nontnr, 0),
				txtdup, txtdup + strlen(txtdup), lex_type);

	/* params set to NULL by above, that's what it should be with arity 0 */

    } else {
	abs_message ("** expected parmlist or space at '%s'", pvp);
	return -EINVAL;
    }
    *hinput = pvp;
    *p_nclass = nontnr; /* = DECODE_NONT_NUMBER(cur_trans->terminal) */
    return 0;
} /* process_trelinp_nonterminal */

static int process_trelinp_entry (Trellis *trellis, Transition *transition, int *p_nclass,
				  long srcpos, unsigned char **hinput)
/* format1: txt_ws_frq nontname ws([ ]+)
// format2: txt_ws_frq nontname
//		openP([(]) par {sep([,]) ws([ ]*) par}* closeP([)]) ws([ ]+)
// format3: txt_ws_frq $MATCH[(]["].*["][)] ws([ ]+)
// format4: txt_ws_frq $SKIP[(]["].*["][)] ws([ ]+)
// where txt_ws_frq: text(["][^"]*["]) ws([ ]*) {frq([[][0-9]+[]]) ws([ ]*)}?
//
// Uses the fact that hinput points into a _copy_ of the real input,
//  which means that we (and our callees) are free to poke 0s into it.
*/
{
    unsigned char const *pcp = *hinput;	/* most calls require const */
    char const *txtbeg;
    char const *txtend;
    long frq = penalty_unknown;
    int retval;

    if ((retval = parse_quoted_string(&pcp, &txtbeg, &txtend))) {
	return retval;
    }
    may_skip_spaces(&pcp);

    if (*pcp == '[') {	/* penalty */
	pcp++;
	if ((retval = parse_decimal(&pcp, &frq))) {
	    return retval;
	}
	if (*pcp != ']') {
	    abs_message ("** expected ']' at '%s'", pcp);
	    return -EINVAL;
	}
	pcp++;
	may_skip_spaces(&pcp);
    }

    if (*pcp == '$') {	/* regexp */
	unsigned re_nr;
	RegType regtype;

	if ((retval = process_trelinp_re_indicator(&re_nr, &regtype,
							p_nclass, &pcp))) {
	    return retval;
	}
	init_regexp_transition(transition, srcpos, re_nr,
				txtbeg, txtend, regtype);
	/* nclass assigned above by process_trelinp_re_indicator() */
    } else if (strchr("=-", *pcp)) {	/* txt is grammar terminal */
	/* *hinput += (txtend - *hinput);
	   **hinput = '\0';
	*/
	if ((retval = process_trelinp_terminal(trellis, transition,
						srcpos, txtbeg, txtend))) {
	    return retval;
	}
	*p_nclass = gr_term_class;
    } else {	/* lexicon nonterminal */
	*hinput += (pcp - *hinput);	/* assign without violating const */
	if ((retval = process_trelinp_nonterminal(trellis->lexicon, transition,
				p_nclass, srcpos, txtbeg, txtend, hinput))) {
	    return retval;
	}
	pcp = *hinput;
    }
    if (frq != penalty_unknown) {
	transition->penalty = frq;
    }
    may_skip_spaces(&pcp);
    *hinput += (pcp - *hinput);	/* assign without violating const */
    return 0;
} /* process_trelinp_entry */

static int
try_process_trelinp_eos(Trellis *trellis, Transition *transition,
				long srcpos, unsigned char const **hinput)
/* format: <EOS>
*/
{
    if (strncmp(*hinput, eos_text, strlen(eos_text))) {
	return -EINVAL;
    }
    *hinput += strlen(eos_text);
    init_eos_transition(transition, srcpos);
    insert_transition(trellis->states_row, srcpos,
			gr_term_class, transition, 0);
    return 0;
} /* try_process_trelinp_eos */

static int process_trelinp_eos_trans (Trellis *trellis, long srcpos, unsigned char const **hinput)
{   /* format: <EOS> */
    Transition *cur_trans = alloc_transition();
    int retval = try_process_trelinp_eos(trellis, cur_trans, srcpos, hinput);

    if (retval) {
	free_transition(cur_trans);
	abs_message ("** expected '%s' at '%s'", eos_text, *hinput);
	return retval;
    }
    return 0;	/* we found EOS */
} /* process_trelinp_eos_trans */

static int process_trelinp_trans (Trellis *trellis, long srcpos, unsigned char **hinput)
{   /* format1: entry "=>" ws pos */
    int retval;
    long destpos;
    Transition *cur_trans = alloc_transition();
    int nontclass;
    unsigned char const *hch;

    /* to make delete_on_error safe */
    cur_trans->type = 0;	/* i.e. TxtFreeBit off */
    cur_trans->params = NULL;
    if ((retval = process_trelinp_entry(trellis, cur_trans,
					&nontclass, srcpos, hinput))) {
	delete_transition(cur_trans);
	return retval;
    }

    if (strchr("=-", **hinput) && (*hinput)[1] == '>') {
	if (**hinput == '-') {
#if 0
	    abs_message ("** can't yet handle -> parts_trans at '%s'", *hinput);
	    delete_transition(cur_trans);
	    return -EINVAL;
#else
	    if (!have_warned_about_parts) {
		abs_message ("** warning: interpreting '->' as '=>'");
		have_warned_about_parts++;
	    }
#endif
	}
	(*hinput) += 2;
    } else {
	abs_message ("** expected => or -> arrow at '%s'", *hinput);
	delete_transition(cur_trans);
	return -EINVAL;
    }
    hch = *hinput;
    may_skip_spaces(&hch);
    if ((retval = parse_decimal(&hch, &destpos))) {
	delete_transition(cur_trans);
	return retval;
    }
    *hinput += hch - *hinput;
    destpos--;

    /* calls to init_lexicon_transition from process_trelinp_entry
    ** (unlike those in text parsing)
    ** already filled transition->terminal with encoded nont,arity
    ** and process_trelinp_entry already set transition->params
    */
    insert_transition(trellis->states_row, srcpos, nontclass,
			cur_trans, destpos - srcpos);
    return 0;
} /* process_trelinp_trans */

long process_trelinp_line(Trellis* trellis, unsigned char const *ainput)
/* expects a zero-terminated line without CR or LF
// format1: pos([ ]*[0-9]+) wss([ ;]*) trans {sep([,]) ws([ ]*) trans}*
//
// pos'es in input text start at 1, internally they are 1 less
//
// In case of error, the routine finding it prints a message with the
// input line from the position that couldn't be read, whereafter
// process_trelinp_line (that's here) prints the whole erroneous input line.
//
// We keep the original input for error messages and use a local copy
// for processing, so callees can store 0's in it.
*/
{
    int retval;
    unsigned char *cinput = (unsigned char *) abs_new_string ((char *)ainput,
							      "process_trelinp_line");
    unsigned char const *pinput = cinput;
    long srcpos;

    may_skip_spaces(&pinput);
    retval = parse_decimal(&pinput, &srcpos);
    if (retval) {
	abs_message ("** in '%s'", ainput);
	abs_free (cinput, "process_trelinp_line");
	return retval;
    }
    srcpos--;	/* internal positions start at 0, external ones at 1 */

    do {
	unsigned char *pvp;
	
	may_skip_spaces(&pinput);
	pvp = cinput + (pinput - cinput);
	if ((retval = process_trelinp_trans(trellis, srcpos, &pvp))) {
	    abs_message ("** in '%s'", ainput);
	    abs_free(cinput, "process_trelinp_line");
	    return retval;
	}
	pinput += pvp - pinput;
	if (*pinput && (*pinput != ',')) {
	    abs_message ("** expected transition separator at '%s'\n** in '%s'", --pinput, ainput);
	    abs_free (cinput, "process_trelinp_line");
	    return -EINVAL;
	}
    } while (*pinput++ == ',');
    abs_free(cinput, "process_trelinp_line");

    /* in order to make -G output look more like input: */
    reverse_trans_lists_at(trellis, srcpos);
    return (0);
} /* process_trelinp_line */

int fix_transition_from_length (StateNode **states_row, Transition *transition,
				Position bpos, Position eospos)
{
    long destpos = get_length(transition) + bpos;

    if (destpos <= bpos || destpos > eospos) {
	abs_message ("** pos %d transition destination %d outside range", bpos,	destpos);
	return -ERANGE;
    } else if (!states_row[destpos]) {
	abs_message ("** pos %d transition destination %d is empty", bpos, destpos);
	return -ERANGE;
    } else {
	add_transition(transition, states_row[destpos], destpos);
	return 0;
    }
} /* fix_transition_from_length */

static int fix_transitions_from_lengths(Trellis *trellis, Position bpos, Position eospos)
/* eospos should contain only an EOS transition, so we stop just before there
** assumes there are no EOS transitions elsewhere
*/
{
    int errcount = 0;

    StateNode** states_row = trellis->states_row;

    for (; bpos < eospos; bpos++) {
	if (states_row[bpos]) {
	    Transition **tra_lists = states_row[bpos]->trans_lists;
	    int class;
	    for (class = NR_classes - 1; class >= 0; class--) {
		Transition* transition = tra_lists[class];
		for (; transition; transition = transition->next) {
		    if (fix_transition_from_length(states_row,
						transition, bpos, eospos)) {
			errcount++;
		    }
		}
	    }
	}
    }
    if (errcount) {
	return -ERANGE;
    }
    return 0;
} /* fix_transitions_from_lengths */

Trellis* make_trellis_from_prelexer_output(const char* input, LEXICON* the_lex)
/* Expects a zero-terminated string containing 'lines' separated
** by LINSEPs (e.g. TABs or LFs, see line_sep below for actual value).
** If the last char is a LINSEP, it is ignored.
*/
{
    Trellis* trellis;
    Position first_pos;
    char const *last_start;
    unsigned char const *lastline;
    char const *line_end;
    long tlen;
    int retval;
    char line_sep = '\n';	/* LINSEP */

    last_start = strrchr(input, line_sep);	/* find last line (EOS) */
    if (last_start) {
	last_start++;
	if (!*last_start) {
	    /* input ends in LINSEP [and LF] */
	    char const *lltmp = input;
	    while (lltmp = strchr(lltmp, line_sep), *++lltmp) {
		last_start = lltmp;
	    }
	    /* in case input is only LINSEP, last_start points to ending '\0' */
	}
    } else {
	last_start = input;
    }
    lastline = (unsigned char const *) last_start;

    may_skip_spaces(&lastline);
    retval = parse_decimal(&lastline, &tlen);
    if (retval) {
	abs_message ("** in '%s'", lastline);
 	abs_exit(2);
	/* return retval; */
    }

    /* tlen is now the decimal at start of last 'line'
    **	i.e. the length of the original input sentence (incl. 1 for EOS)
    */

    trellis = alloc_trellis(tlen);
    trellis->lexicon = the_lex;
    init_trellis(trellis, tlen);
#ifdef COUNT_TRACE
    n_trel_builds++;
#endif

    tlen--;	/* from here used as internal EOS index */

    /* make_sure_last_line_is_EOS: */
    may_skip_spaces(&lastline);
    retval = process_trelinp_eos_trans(trellis, tlen, &lastline);

    /* lastline now points to the char after the EOS line,
    ** which is '\0' or LINSEP
    */
    if (!retval && *lastline && (*lastline != line_sep)) {
	abs_message ("** superfluous input after EOS at '%s'", lastline);
	retval = -EINVAL;
    }
    if (retval) {
	abs_message ("** in '%s'", input);
	delete_trellis(trellis);
	abs_exit(2);
    }

    /* Process upto the last 'line' (which has already been done).
    ** We hit last line when either
    **  1. strchr succeeds but finds the trailing LINSEP
    **		(which is the char that lastline now points to)
    **		that char obviously is > last_start
    **  2. strchr fails (in case there is no trailing LINSEP)
    */
    while ((line_end = strchr(input, line_sep)) && (line_end < last_start)) {
	char *dinput = copy_string(input, line_end - input);
	/* copy_string-> abs_malloc aborts if no mem */

	retval = process_trelinp_line(trellis, dinput);
	abs_free (dinput, "make_trellis_from_prelexer_output");
	if (retval) {
	    delete_trellis(trellis);
	    abs_exit(2);
	}
	input = line_end + 1;
    }
    /* last line (EOS) already processed before above loop */

    for (first_pos = 0; !trellis->states_row[first_pos]; first_pos++) {
	/* skip */
    }
    SET_FIRST_POS(trellis,first_pos);
    retval = fix_transitions_from_lengths(trellis, first_pos, tlen);
    if (retval) {
	abs_message ("** in '%s'", input);
	delete_trellis(trellis);
	abs_exit(2);
    }

    add_trellis_neg_memos(trellis);
#ifdef PMRTS
    add_trellis_pos_memos(trellis);
#endif /* PMRTS */

    return trellis;
} /* make_trellis_from_prelexer_output */
