/*
   File: lxcn_input.h
   Defines interfacing definitions for the lexer I/O routines

   Copyright 2009 Radboud University of Nijmegen
 
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.

   CVS ID: "$Id$
*/
#ifndef IncLexiconInput
#define IncLexiconInput

#ifdef  __cplusplus
extern "C" {
#endif

/*
   The lexicon generator will put lexemes with different markers
   in different tries. Such a marker will no longer be present
   in the lexemes. Temporarily, however the grammar terminals
   still use the old lexeme markers. For compatability reasons
   the old markers and new markers will coincide.
   The new markers use the following bits.

   bit 0: Prefix
   bit 1: Suffix
   bit 2: Multi Word
   bit 3: Literal Word

   Note that an infix is formed by the Prefix/Suffix OR;
   Furthermore we introduce a special character coded as
   0xAD (Extended ASCII Soft Hyphen), which is not a valid
   UTF-8 encoding, to indicate matching with '-', '-' + white space
   or absence of '-'. The example for this case is well-known,
   which should match wellknown, well-known or well-<WS>known
*/
#define LexemePrefixBit 0x1
#define LexemeSuffixBit 0x2
#define LexemeMultiWordBit 0x4
#define LexemeLiteralBit 0x8
#define SoftHyphenChar 0xAD
#define LiteralChar 0xA0

/* Lexeme classification */
typedef enum
{ Prefix,
  Infix,
  Suffix,
  MultiToken,
  SingleToken
} LexemeType;
LexemeType lxcn_lex_type_from_marker (int marker);

int lxcn_is_eos (char c);
int lxcn_is_white_space (char c);
int lxcn_is_terminator (char c);
char lxcn_translate (char c);
int lxcn_translate_penalty (char c);
void lxcn_print_lexeme (char *lexeme, LexemeType lex_type);
void lxcn_init_char_tables (char *white_space,
			    char *translate_src, char *translate_dst,
			    int *translate_penalties);

#ifdef  __cplusplus
}
#endif
#endif /* IncLexiconInput */
