%{
/* Scanner for gra2o, tokenizes .gra and .lex files.
 *
 * Copyright 2001, KUN.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */

/* $Id: scanner.l,v 1.10 2001/10/22 15:12:03 ejv Exp $ */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /* HAVE_CONFIG_H */

#include <cdl3rts.h>
#include <predef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#ifndef WIN32
#include <sys/uio.h>
#endif /* WIN32 */
#include <unistd.h>

/*
** Read from file (not just stdin)
*/
int read_input(char* buf, int max);

#ifdef YY_INPUT
#undef YY_INPUT
#endif /* YY_INPUT */
#define YY_INPUT(buf,result,max_size) result=read_input(buf,max_size);

#define YY_NO_UNPUT

/*
** Private
*/

static int line_number = 1;
static int pos_in_line = 1;
static FILE *input_file = NULL;

/*
** This typedef corresponds with the cdl3 affix rule TOKEN defined in token.k3 
*/ 

typedef enum TOKEN {
  no_token,
  end_of_file,
  double_colon,
  colon,
  bar,
  comma,
  left_parenthesis,
  left_curly_bracket,
  left_square_bracket,
  right_parenthesis,
  right_curly_bracket,
  right_square_bracket,
  semicolon,
  period,
  unknown_character,
  idpart,
  quoted_string,
  number,
  dotnumber,
  grammar_symbol,
  lexicon_symbol,
  includes_symbol,
  root_symbol,
  integer_symbol,
  text_symbol,
  cond_symbol,
  rule_symbol,
  option_symbol,
  penalty_symbol,
  skip_symbol,
  match_symbol,
  commit,
  slash,
  ampersand
} TOKEN;

%}

STRING_ITEM             (\\.)|[ !#-\[\]-~\177-\377]
COMMENT                 #.*\n
LETTER                  [a-zA-Z\177-\377]
ORNAMENT                [\+\-\?@\^~_]
LETMENTS                ({LETTER}|{ORNAMENT})*{LETTER}({LETTER}|{ORNAMENT})*
IDPART                  {LETMENTS}
NUMBER                  0|\-?[1-9][0-9]*
LAYOUT                  [ \t\r]

%%
{LAYOUT}+               pos_in_line += yyleng;
{COMMENT}               line_number++; pos_in_line = 1;
\n                      line_number++; pos_in_line = 1;
::                      pos_in_line += 2; return double_colon;
:                       pos_in_line++; return colon;
\|                      pos_in_line++; return bar;
,                       pos_in_line++; return comma;
\(                      pos_in_line++; return left_parenthesis;
\)                      pos_in_line++; return right_parenthesis;
\{                      pos_in_line++; return left_curly_bracket;
\}                      pos_in_line++; return right_curly_bracket;
\[                      pos_in_line++; return left_square_bracket;
\]                      pos_in_line++; return right_square_bracket;
;                       pos_in_line++; return semicolon;
\&                      pos_in_line++; return ampersand;
\/                      pos_in_line++; return slash;
\.{NUMBER}              pos_in_line += yyleng; return dotnumber;
\.                      pos_in_line++; return period;
\!                      pos_in_line++; return commit;
INT                     pos_in_line += yyleng; return integer_symbol;
GRAMMAR                 pos_in_line += yyleng; return grammar_symbol;
LEXICON                 pos_in_line += yyleng; return lexicon_symbol;
INCLUDES                pos_in_line += yyleng; return includes_symbol;
ROOT                    pos_in_line += yyleng; return root_symbol;
TEXT                    pos_in_line += yyleng; return text_symbol;
COND                    pos_in_line += yyleng; return cond_symbol;
RULE                    pos_in_line += yyleng; return rule_symbol;
OPTION                  pos_in_line += yyleng; return option_symbol;
\$PENALTY               pos_in_line += yyleng; return penalty_symbol;
\$SKIP                  pos_in_line += yyleng; return skip_symbol;
\$MATCH	                pos_in_line += yyleng; return match_symbol;
{IDPART}                pos_in_line += yyleng; return idpart;
\"{STRING_ITEM}*\"      pos_in_line += yyleng; return quoted_string;
{NUMBER}                pos_in_line += yyleng; return number;
<<EOF>>                 return end_of_file;
.                       pos_in_line++; return unknown_character;
%%

int yywrap()
{
    return 1;
}

#include <str_util.h>
#include "util.h"

int read_input(char* buf, int max)
{
    int ret;

    ret = read(fileno(input_file), (char *)buf, max);

    if (ret < 0) {
        my_abort("read_input(): read failed");
    }

    return ret;
}

static inline int get_line()
{
    return line_number;
}

static inline int get_position()
{
    return pos_in_line;
}

/*
** cdl3-support interface:
*/

static int fileptr;
static int last;

void E222_reset_file_pointer()
{
    last = fileptr - 1;
    fileptr = 0;
}


/*
** ACTION set scanner input file(>FILE)
*/

void E217_set_scanner_input_file_FILE(value V_FILE1)
{
#ifdef DEBUG_SCANNER
    fprintf(stderr,"E217_set_scanner_input_file called with 0x%p\n", V_FILE1);
#endif /* DEBUG_SCANNER */

    if (input_file != NULL) {
        YY_NEW_FILE;
    }
    input_file = File(V_FILE1);

#ifdef DEBUG_SCANNER
    fprintf(stderr,"  input_file now is 0x%p\n", input_file);
#endif /* DEBUG_SCANNER */

    attach(V_FILE1);
    line_number = 1;
}

/* 
** FUNCTION set line(>INT)
*/

void E201_set_line_INT(value v_INT)
{
    line_number = Int(v_INT);
}

/*
** FUNCTION get line(INT>)
*/

void E202_get_line_INT(value* v_INT)
{
    *v_INT = C_INT((long) line_number);
}


/*
** FUNCTION get position(INT>)
*/

void E220_get_position_INT(value* v_INT)
{
  *v_INT = C_INT((long) pos_in_line);
}

/*
** ACTION get token(TOKEN>)
*/

void E203_get_token_TOKEN(value* V_TOKEN)
{
    value v;
    int t;

    t = yylex();

#ifdef DEBUG_SCANNER
    fprintf(stderr, "E203_get_token <%s>\n",yytext);
#endif /* DEBUG_SCANNER */

    switch (t) {
        case quoted_string:
        case unknown_character:
            join(v, t, 1);
            set(v, 1, C_TEXT(yytext));            /* C_TEXT makes copy */
            break;

        case idpart:
        {
            char* tmp_dst = malloc(yyleng + 1);
            join(v, t, 1);
            set(v, 1, C_TEXT(strip_copy(tmp_dst, yytext))); /* C_TEXT also makes copy */
            free(tmp_dst);
            break;
        }

        case dotnumber:
            join(v, t, 1);
            set(v, 1, C_INT(atol(yytext + 1)));
            break;

        case number:
            join(v, t, 1);
            set(v, 1, C_INT(atol(yytext)));
            break;

        default:
            join(v, t, 0);
            break;
    };

    *V_TOKEN = v;
}

