/*
   File: lxcn_input.h
   Defines interfacing definitions for the lexer I/O routines
   Copyright 2007 Radboud University of Nijmegen
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
 
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU Library General Public License for more details.
 
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

   CVS ID: "$Id: lxcn_input.h,v 1.2 2007/10/31 14:58:06 marcs Exp $
*/
#ifndef IncLexiconInput
#define IncLexiconInput

#ifdef  __cplusplus
extern "C" {
#endif

/*
   The lexicon generator will put lexemes with different markers
   in different tries. Such a marker will no longer be present
   in the lexemes. Temporarily, however the grammar terminals
   still use the old lexeme markers. For compatability reasons
   the old markers and new markers will coincide.
   The new markers use the following bits.

   bit 0: Prefix
   bit 1: Suffix
   bit 2: Multi Word
   bit 3: Literal

   Note that an infix is formed by the Prefix/Suffix OR;
   Furthermore we introduce a special character coded as
   0xAD (Extended ASCII Soft Hyphen), which is not a valid
   UTF-8 encoding, to indicate matching with '-', '-' + white space
   or absence of '-'. The example for this case is well\-known,
   which should match wellknown, well-known or well-<WS>known
*/
#define LexemePrefixBit 0x1
#define LexemeSuffixBit 0x2
#define LexemeMultiWordBit 0x4
#define LexemeLiteralBit 0x8
#define SoftHyphenChar 0xAD

enum
{ EosMark		= '\0',
  EmptyMark		= '\0',
  PrefixMark		= '\1',
  SuffixMark		= '\2',
  InfixMark		= '\3',
  MultiTokenMark	= '\4'
};

/* Lexeme classification */
typedef enum
{ Prefix,
  Infix,
  Suffix,
  MultiToken,
  SingleToken
} LexemeType;

int is_an_old_lex_marker (char marker, LexemeType *lex_type);
char lxcn_get_lex_mark (LexemeType lex_type);

int lxcn_is_eos (char c);
int lxcn_is_blank (char c);
int lxcn_is_terminator (char c);
int lxcn_is_invisible (char c);
char lxcn_translate (char c);
void lxcn_print_lexeme (char *lexeme);
void lxcn_init_char_tables (char *blanks, char *terminators, char *invisibles, 
			    char *translate_src, char *translate_dst);

#ifdef  __cplusplus
}
#endif
#endif /* IncLexiconInput */
