%{
/* vim: set tw=0 et: */

// .dat file parser
//
// Copyright 2001, KUN.
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Library General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

// $Id: parser.y,v 1.14 2007/07/27 19:43:20 marcs Exp $

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif // HAVE_CONFIG_H

using namespace std;
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include "lxcn_input.h"
#include "globtables.h"   /* for rule_table */
#include "parserinterface.h"

extern char* yytext;
extern void yyerror(char*);
extern int yylex();

/* quick fix problem in /usr/local/gnu/lib/bison/yacc.c */
#undef __GNUC_MINOR__

static char *lexeme_copy(char* dst, char* src, int len);
%}

%token NUMBER IDPART NEWLINE
%token COMMA OPENPAR CLOSEPAR VBAR STRING

%union {
    char* chars;
    EntryList* EntryListPtr;
    IDList* ids;
    Param* param_ptr;
    pParam p_param;
    MyParamList* params;
    string* string_ptr;
    long number;
}

%type <chars> STRING
%type <number> NUMBER
%type <ids> aff_term_list
%type <param_ptr> aff_denotation
%type <params> paramlist
%type <p_param> params opt_params
%type <EntryListPtr> nont
%type <string_ptr> identifier IDPART

%%

lexicon: lex_module_body
       ;

identifier: identifier IDPART
                {
                    *($1) += " " + *($2);
                    delete $2;
                    $$ = $1;
                }
          | IDPART
                {
                    $$ = $1;
                }
          ;

lex_module_body: rules
               ;

rules: rules rule
     |
     ;

rule: term_rule
    | NEWLINE
    ;

/* affix terminals: */

aff_denotation: STRING
                    {
                        ID id = idtable.add($1);
                        free($1);
                        $$ = new Param(TextType, id);
                    }
              | NUMBER                  { $$ = new Param($1); }
              | aff_term_list           { $$ = affterms_to_param($1); }
              ;

aff_term_list: aff_term_list VBAR identifier
                                        { $$ = aff_terml_add($1, $3); }
             | identifier               { $$ = aff_terml_new($1); }
             ;


/* parameter packs: */

opt_params: params                      { $$ = $1; }
          |                             { $$ = NULL; }
          ;

params: OPENPAR paramlist CLOSEPAR      { $$ = build_params($2); }
      ;

paramlist: paramlist COMMA aff_denotation
            {
                ($1)->push_front($3);
                $$ = $1;
            }
         | aff_denotation
            {
                MyParamList* params = new MyParamList;
                params->push_front($1);
                $$ = params;
            }
         ;


/* terminal rules: */

term_rule: STRING nont NEWLINE
            {
                int len = strlen($1);
                char *lexeme = (char *)malloc(len * 2 + 2 + 1);
                lexeme_copy(lexeme, $1, len);
                rule_table.enter(lexeme, *($2));
                delete $2;
                free($1);
                free(lexeme);
            }
         ;

nont: identifier opt_params             { $$ = nont_def($1, $2, 0); }
    | identifier opt_params NUMBER      { $$ = nont_def($1, $2, $3); }
    ;
      
%%

void yyerror(char* s)
{
    parser_error = s;
}

/*------------------------------------------------------------------------------
// Function:
//	char* lexeme_copy(char* dst, char* src, int len)
//
// Description:
//	Copy string src with length len to dst.
//	Reduce multiple spaces or tabs to one space.
//	Strip quotes, and strip hyphens from prefixes, suffixes and infixes.
//	Replace \- with -, \\ with \, \n with newline, \t with tab, \" with "
//	Insert control codes before prefixes, infixes and suffixes.
//
// Return value:
//	Pointer dst.
//
// Side Effects:
//	Contents of dst are overwritten.
//
// Memory management:
//	None.
//
// Note:
//	String src should not be preceeded or followed by layout.
//	String dst should be at least len + 1 bytes long. XXX not enough!
//
//	Adaptions to lexeme_copy() should also be applied to other components
//	of lexicon system (lexicon, agfl, and rts).
//----------------------------------------------------------------------------*/
static char *lexeme_copy(char* dst, char* src, int len)
{
  int	layout;
  int	prefix = 0;
  int	suffix = 0;
  int	multi_token = 0;
  char	c;
  char* p = src;
  char* d = dst;

	// strip trailing and leading layout
  while ((c = *src), (c == ' ') || (c == '\t'))
    { src++; len--; }
  while ((c = src[len - 1]), (c == ' ') || (c == '\t'))
    src[--len] = '\0';

	// strip prefix and suffix marks
  if (*src == '-')
  {
    suffix = 1;
    src++; len--;
  };
  if ((len > 0) && (src[len - 1] == '-')
      && !((len > 1) && (src[len - 2] == '\\')))
  {
    prefix = 1;
    src[--len] = '\0';
  };

  /* strip trailing and leading layout */
  while ((c = *src), (c == ' ') || (c == '\t'))
    { src++; len--; }
  while ((c = src[len - 1]), (c == ' ') || (c == '\t'))
    src[--len] = '\0';

  /* maybe there were only hyphens and layout in wordform */
  if (!*src)
  {
    if (prefix && suffix)
      strcpy(dst, "--");
    else if (prefix || suffix)
      strcpy(dst, "-");
    else
      strcpy(dst, " ");
    return dst;
  };

  /* check for multi token */
  while ((c = *p++))
  {
    if ((c == ' ') || (c == '\t'))
    {
      multi_token = 1;
      break;
    };
  }; 

  /* mark word form with type */
  if (multi_token)
  {
    *d++ = MultiTokenMark;
    if (suffix)
      *d++ = '-';
  }
  else if (prefix && suffix)
    *d++ = InfixMark;
  else if (prefix)
    *d++ = PrefixMark;
  else if (suffix)
    *d++ = SuffixMark;

  /* copy word form */
  layout = 1;
  while ((c = *src++))
  {
    switch(c)
    {
      case ' ':
      case '\t':
        if (!layout)
        {
          *d++ = ' ';
          layout = 1;
        };
        break;
      case '\\':
        c = *src++;
        switch (c)
        {
          case '-':
          case '"':
          case '\\':
            *d++ = c;
            break;
          case 'n':
            *d++ = '\n';
            break;
          case 't':
            *d++ = '\t';
            break;
          default:
            *d++ = c;
        };
        layout = 0;
        break;
      default:
        *d++ = c;
        layout = 0;
    };
  };
  if (multi_token && prefix)
    *d++ = '-';
  *d = '\0';
  return dst;
}

