/*
   File: trp_parser.c
   Parses the triple file

   Copyright 2009-2010 Radboud University of Nijmegen

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.

   CVS ID: "$Id$"
*/

/* system includes */
#include <stdio.h>
#include <string.h>
#include <ctype.h>

/* libabase includes */
#include <abase_memalloc.h>

/* local includes */
#include "globals.h"
#include "lexgen_lexer.h"
#include "dyn_array.h"
#include "trp_parser.h"
#include "nonterminals.h"
#include "entries.h"

static text_array interesting_relators;
static int everything_is_interesting = 0;
static int nr_uninteresting;

int nr_uninteresting_triples ()
{ return nr_uninteresting;
}

static void add_default_interesting_relators ()
{ app_text_array(interesting_relators, "SUBJ");
  app_text_array(interesting_relators, "OBJ");
  app_text_array(interesting_relators, "ATTR");
  app_text_array(interesting_relators, "PREP");
}

static void parse_relators_dat_file (char *filename)
{ 
  char rel_buf[MAX_LEX_LINE_LEN + 1];

  interesting_relators = init_text_array(5);

  if (abs_file_ext_exists(filename, suffix_from_file_kind(relators)))
  { try_open_lexer_file(filename, relators);
    while (!is_end())
    { if (is_eoln ()) lexer_read_line ();
      else if (is_relation (rel_buf)) {
        app_text_array(interesting_relators,
		       abs_new_string(rel_buf, "parse_relators_dat_file"));
      }
      else if (is_char ('*'))
      { everything_is_interesting = 1;
      } else 
      { should_be_name (rel_buf);
        if (strcmp(rel_buf, "default") == 0)
	  add_default_interesting_relators ();
	else parse_error ("Relator, \"default\" or \"*\" expected");
      }
    }
    should_be_eof ();
    close_lexer_file ();
  } else
  { everything_is_interesting = 1;
  }
}

static int is_interesting_relator (char *relator)
{ int length = 0;
  char *p;
  int i;

  if (everything_is_interesting) return 1;

  /* Check the length of the main name of the relator */
  for (p = relator; isupper(*p); p++) length++;

  for (i = 0; i < interesting_relators-> size; i++)
  { if (strncmp(relator, interesting_relators->array[i], length) == 0)
      return 1;
  }

  return 0;
}

/*
   A triple has the form
   "STRING"	RELATION "STRING"
   or
   [MQSTRING,RELATION,MQSTRING]
   where an MaybeQuotedSTRING may be quoted.
*/

/*
   Basic LL(1) parsing of the trp file
*/
static int is_triple ()
{ char left_buf[MAX_LEX_LINE_LEN + 1];
  char rel_buf[MAX_LEX_LINE_LEN + 1];
  char right_buf[MAX_LEX_LINE_LEN + 1];
  int frequency;
  int crits[4];
  int *info_ptr;

  if (is_char('['))
  { should_be_string_up_to(left_buf, ',');
    should_be_char(',');
    should_be_relation (rel_buf);
    should_be_char(',');
    should_be_string_up_to(right_buf, ']');
    should_be_char(']');
  } else
  { if (!is_string_with_expansion (left_buf)) return (0);
    should_be_relation (rel_buf);
    should_be_string_with_expansion (right_buf);
  }
  if (!is_signed_number (&frequency)) frequency = 1;
  if (is_interesting_relator (rel_buf))
  { crits[0] = 3;	/* criticals */
    crits[1] = register_critical_text (left_buf);
    crits[2] = register_critical_text (rel_buf);
    crits[3] = register_critical_text (right_buf);
    info_ptr = enter_into_fact_table (nr_of_facts (), crits);
    *info_ptr += frequency;
  } else
  { nr_uninteresting++;
  }
  return (1);
}

/*
   Drive the triple file parsing
*/
static void parse_trp_file (char *tname)
{ try_open_lexer_file (tname, triple);
  while (!is_eof ())
    { /* Body should eat line */
      may_skip_white_space ();
      if (is_eoln ()) lexer_read_line ();
      else if (is_comment ()) ;
      else if (is_triple ())
        { /* May still be followed by a comment */
	  if (is_comment ()) ;
          else should_be_eoln ();
	}
      else
	{ parse_error ("incomprehensible syntax");
	  lexer_read_line ();
        };
    }
  close_lexer_file ();
}

void parse_triples ()
{ int ix;
  parse_relators_dat_file(RELATORS_DAT_FILE);
  for (ix = 0; ix < triples_database_names -> size; ix++)
    parse_trp_file (triples_database_names -> array[ix]);
}
