/*
   File: arts_minitdb.c
   Support for the mini-triple-database.

   Copyright 2010 Radboud University of Nijmegen
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
 
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU Library General Public License for more details.
 
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

*/

/* standard includes */
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>

/* libabase includes */
#include "abase_memalloc.h"
#include "abase_error.h"

/* libarts includes */
#include "arts_ds.h"
#include "arts_io.h"
#include "arts_hybrid.h"
#include "arts_lexer.h"

/* liblexicon includes */
#include "lxcn_search.h"

#define DB(x)

struct part {
    char *text;
    union {
	struct part *tail;
	int freq;
    };
    struct part *next;
};

struct part *mini_tdb;

static
struct part *lookup_text(struct part *start, char *text)
{
    struct part *bestpart = NULL;
    int bestpenalty = INT_MAX;

    while (start != NULL) {
	DB(abs_message("lookup_text: search '%s' == '%s' found?", text, start->text);)
#if 0
	int penalty = lxcn_strmatch(text, start->text);
	if (penalty == 0) {
	    return start;
	}
	if (penalty < bestpenalty) {
	    bestpenalty = penalty;
	    bestpart = start;
	}
#else
	if (strcmp(text, start->text) == 0)
	    return start;
#endif
	start = start->next;
    }

    return bestpart;
}

/*
 * XXX mostly copied from arts_hybrid.c:lookup_critical_term()
 */

static
struct part *lookup_term(struct part *start, term t)
{
    char buffer[MAXINPUT];
    char *dptr = buffer;
    string_list ptr;

    if (t == null_term) return NULL;
    /*
     * Small optimization; with the scanning for spaces, it
     * becomes less useful, if at all...
     */
    if (t -> next == NULL &&
	    !arts_ifd.triple_translate &&
	    strchr(t -> text, ' ') == NULL) {
	return lookup_text (start, skip_pos_prefixes (t->text));
    }

    for (ptr = t; ptr != NULL; ptr = ptr->next) {
	char *sptr;
	for (sptr = ptr->text; *sptr; sptr++, dptr++) {
	    *dptr = *sptr;
	    if (*dptr == ':') dptr = buffer - 1;	/* eqv. skip_pos_prefixes(buffer) */
	}
    }

    /* Trailing spaces are silently eaten */
    while (dptr > buffer && dptr[-1] == ' ') dptr--;
    *dptr = '\0';

  /* Optionally map everything to lowercase
   * (really: use alphabet file)
   */
  if (arts_ifd.triple_translate) {
      dptr = buffer;
      while (*dptr) {
	  *dptr = lxcn_translate(*dptr);
	  dptr++;
      }
  }

    return lookup_text (start, buffer);
}

static
struct part *lookup_tail_term(struct part *start, term t)
{
    if (start)
	return lookup_term(start->tail, t);
    return NULL;
}

static
struct part *lookup_tail_text(struct part *start, char *text)
{
    if (start)
	return lookup_text(start->tail, text);
    return NULL;
}

int minitdb_lookup_triples (term_list heads, modifier_list mods)
{
    term_list hptr;
    int sum = 0;

    DB(abs_message("minitdb_lookup_triples; head:");)

    if (mini_tdb == NULL) {
	DB(abs_message("minitdb_lookup_triples; mini_tdb == NULL");)
	return 0;
    }

    for (hptr = heads; hptr != null_term_list; hptr = hptr->rest) {
	modifier_list mptr = mods;
	struct part *hd_part = lookup_term(mini_tdb, hptr->first);

	for ( ; mptr != null_modifier_list; mptr = mptr->next) {
	    term_list elems = mptr->elems;
	    int dir = mptr->dir;
	    DB(abs_message("relator (%d):", dir);)
	    struct part *rel_part = lookup_tail_text(hd_part, mptr->relator);

	    for ( ; elems != null_term_list; elems = elems->rest) {
		struct part *REL_part;
		struct part *tl_part;

		if (dir) { /* reverse */
		    struct part *HD_part;
		    DB(abs_message("reverse; head/rel:");)
		    HD_part = lookup_term(mini_tdb, elems->first);
		    REL_part = lookup_tail_text(HD_part, mptr->relator);
		    DB(abs_message("modifier:");)
		    tl_part = lookup_tail_term(REL_part, hptr->first);
		} else {
		    REL_part = rel_part;
		    DB(abs_message("modifier:");)
		    tl_part = lookup_tail_term(REL_part, elems->first);
		}
		
		
	        if (arts_ifd.show_triple_lookups_option)
	  	  { abs_printf("MiniTDBlookup: [ ");
                    print_critical_term (-1, hptr -> first);
		    abs_printf(", %c%s, ", (dir)?'>':'<',mptr -> relator);
                    print_critical_term (-1, elems -> first);
		    abs_printf(" ] => ");
		    if (tl_part != NULL) abs_printf("f=%d\n", tl_part->freq);
		    else abs_printf("not found\n");
		  }
		if (tl_part != NULL) {
		    sum -= bonus_from_frequency (tl_part->freq, arts_ifd.radix.tripledb_frequency);
		    DB(abs_message("freq = %d", tl_part->freq);)
		} else {
		    if (arts_ifd.closed_triple_db_option) {
			sum = MAX_PENALTY;
		    }
		    if (arts_ifd.show_triple_lookups_option) {
			unfound_triple_mod("minitdb", hptr, mptr, elems);
		    }
		    break;
		}
	    }
	}
    }

    return sum;
}

/* ========================================================================= *
 *                                                                           *
 * Allocation/deallocation functions                                         *
 *                                                                           *
 * ========================================================================= */

static
struct part *alloc_part()
{
    struct part *p = abs_malloc(sizeof(struct part), "alloc_part");
    return p;
}

static
void free_part(struct part *p, int depth)
{
    if (p) {
	if (depth < 2) {
	    free_part(p->tail, depth + 1);
	}
	free_part(p->next, depth);
	abs_free(p->text, "free_part");
	abs_free(p, "free_part");
    }
}

void free_minitdb(void)
{
    free_part(mini_tdb, 0);
    mini_tdb = NULL;
}

/*
 * Insert new item in list, or return existing one.
 * XXX Should be sorted such that un-mapped strings come before
 * the strings that map to them, so a best-match can be obtained first...
 */
static struct part *new_part(struct part **headp, char *text)
{
    struct part *head = *headp;
    struct part *newpart;

    while (head) {
	int cmp = strcmp(head->text, text);
	DB(abs_message("new_part: %s %d %s\n", head->text, cmp, text);)

	if (cmp == 0)	/* found match */
	    return head;
	if (cmp > 0) {	/* too far; insert new text before this one */
	    break;
	}
	headp = &head->next;
	head = *headp;
    }

    /* Insert before *headp */
    newpart = alloc_part();
    newpart->text = abs_new_string(text, "new_part");
    newpart->tail = NULL;
    newpart->next = head;
    *headp = newpart;

    return newpart;
}

/* ========================================================================= *
 *                                                                           *
 * Dumping functions                                                         *
 *                                                                           *
 * ========================================================================= */

static void dump_part(struct part *p, int depth)
{
    while (p) {
	abs_printf("%*.s'%s'", depth*4, "", p->text);
	if (depth < 2) {
	    abs_printf("\n");
	    dump_part(p->tail, depth + 1);
	} else {
	    abs_printf(" %d\n", p->freq);
	}
	p = p->next;
    } 
}

/* ========================================================================= *
 *                                                                           *
 * Parsing functions, depending on the lexer in lexgen.                      *
 *                                                                           *
 * ========================================================================= */

#include "lexgen_lexer.h"

/*
 * Two globals needed by lexgen_lexer.o
 */
int verbose = -1;
int hyphen_convention_active;

/*
   Basic LL(1) parsing of the trp file
*/
static int is_triple ()
{ char left_buf[MAX_LEX_LINE_LEN + 1];
  char rel_buf[MAX_LEX_LINE_LEN + 1];
  char right_buf[MAX_LEX_LINE_LEN + 1];
  int frequency;

  if (is_char('['))
  { should_be_string_up_to(left_buf, ',');
    should_be_char(',');
    should_be_relation (rel_buf);
    should_be_char(',');
    should_be_string_up_to(right_buf, ']');
    should_be_char(']');
  } else
  { if (!is_string_with_expansion (left_buf)) return (0);
    should_be_relation (rel_buf);
    should_be_string_with_expansion (right_buf);
  }
  if (!is_signed_number (&frequency)) frequency = 1;
  if (1 /*is_interesting_relator (rel_buf)*/)
  { 
    struct part *p;

    p = new_part(&mini_tdb, left_buf);
    p = new_part(&p->tail, rel_buf);
    p = new_part(&p->tail, right_buf);
    p->freq += frequency;
  } else
  { /*nr_uninteresting++;*/
  }
  return (1);
}

/*
   Drive the triple file parsing
*/
static void parse_trp_file (char *tname)
{ try_open_lexer_file (tname, triple);
  while (!is_eof ())
    { /* Body should eat line */
      may_skip_white_space ();
      if (is_eoln ()) lexer_read_line ();
      else if (is_comment ()) ;
      else if (is_triple ())
        { /* May still be followed by a comment */
	  if (is_comment ()) ;
          else should_be_eoln ();
	}
      else
	{ parse_error ("incomprehensible syntax");
	  lexer_read_line ();
        };
    }
  close_lexer_file ();
}

void parse_minitdb (char *tname)
{
    parse_trp_file (tname);
    DB(dump_part(mini_tdb, 0);)
}
