/*
   File: lxcn_test_lookup.c

   Copyright 2009 Radboud University of Nijmegen

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.

   CVS ID: "$Id$"
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/resource.h>
#include <lxcn_input.h>
#include <lxcn_search.h>
#include <lxcn_lexicon.h>
#include <lxcn_vocabulary_impl.h>

/* rudimentary interface to test liblexicon seperately from agfl
 * arg1: blx file (default: example.blx)
 * arg2: max distance
 */

#ifndef timersub
#define timersub(tvp, uvp, vvp)						\
	do {								\
		(vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec;		\
		(vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec;	\
		if ((vvp)->tv_usec < 0) {				\
			(vvp)->tv_sec--;				\
			(vvp)->tv_usec += 1000000;			\
		}							\
	} while (0)
#endif

char *get_word(FILE* in)
{
    static char buf[256], hold[256], *q;
    static char *p = buf;

    while(*p && !isalpha(*p)) ++p;

    if(*p == '\0') {
	p = buf;
	printf("> ");
	if(fgets(buf, 256, stdin) == NULL) return 0;
	if(buf[254] != '\0')
	    fprintf(stderr, "warning: Input line too long: input may be truncated!\n");
    }

    for(q=hold; isalnum(*p) || *p=='\\' && *++p; )
	*q++ = *p++;
    *q = 0;

    return hold;
}

char *default_arg(char *arg)
{
    return arg? arg : "example.blx";
}

int main(int argc, char *argv[])
{
    Lexicon lex = lxcn_read_lexicon(default_arg(argv[1]));
    double total = 0;
    unsigned words = 0, hits = 0, partials = 0;
    unsigned nodes = 0, swaps = 0, entries = 0;
    int lvocs, lnodes, lentries;
    size_t lsize, ltotal;
    char *s;
    int showpartial = 0, breakone = 0, stats = 0;
    if(!lex) abort();
 
    max_edit_distance = argc>2? atoi(argv[2]) : 1;
    best_first = argc>3? atoi(argv[3]) : 1;
    showpartial = argc>4 && strchr(argv[4], 'p');
    breakone = argc>4 && strchr(argv[4], '1');
    stats = argc>4 && strchr(argv[4], 's');

    printf("max edit dist[%d] best first[%d]\n", max_edit_distance, best_first);
    lxcn_lexicon_stats(lex, &lvocs, &lnodes, &lentries, &lsize);
    ltotal = sizeof(struct voc_trie_rec)*(lnodes) + sizeof(struct voc_index_tree_rec)*(lnodes-1);
    printf ("Lexicon size: %d nodes, %d lexemes, %ld stringbytes, %ld structbytes\n",
	    lnodes, lentries, lsize, ltotal);

    /*lxcn_init_char_tables (" \t\n\r",  "",
			     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
			     "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
    */
    lxcn_init_char_tables (" \t\n\r",  "", "", 0);

    while(s = get_word(stdin)) {
        struct rusage a, b;

	printf("Looking up [%s]\n", s);
        getrusage(RUSAGE_SELF, &a);
	LexiconIterator iter = lxcn_init_lexicon_match(s, lex);
        volatile int checkme = 0xdeadbeef;

	unsigned num = 0, pnum = 0;
	do {
	    int nr, dist;
	    LexemeType lextype;

	    char *str = lxcn_next_lexicon_match(iter), *str2;
	    if(!str) break;
            ++pnum;
	    if(*str && !showpartial) continue;

	    lxcn_get_lexicon_match_info(iter, &nr, &str2, &lextype, &dist);
	    if (num < 200 && stats) printf("[%6lu nodes %6lu entries %6lu swaps] ", lxcn_nodes_visited, lxcn_entries_tried, lxcn_swaps);
	    if (num < 200) printf("Match %d: [%s] (%d,t%d) rem [%s] dist(%d)\n", num, str2, nr, lextype, str, dist);
            ++num;
            assert(checkme==0xdeadbeef);
  	    if(breakone) break;
	} while(1);
        getrusage(RUSAGE_SELF, &b);
        struct timeval res;
        timersub(&b.ru_utime, &a.ru_utime, &res);
        double d = (res.tv_sec * 1e6 + res.tv_usec) / 1000;
	if(stats) printf("[%6lu nodes %6lu entries %6lu swaps] ", lxcn_nodes_visited, lxcn_entries_tried, lxcn_swaps);
        printf("<... %d full matches, %d partial matches not shown $ %.3fms>\n", num, pnum-num, d);

	words++;
	hits += num;
	partials += pnum-num;
	total += d;

	nodes += lxcn_nodes_visited;
	swaps += lxcn_swaps;
	entries += lxcn_entries_tried;

	lxcn_finish_lexicon_match(iter);
    }

    printf("\n");
    if(stats) printf("[%6u nodes %6u entries %6u swaps] ", nodes, entries, swaps);
    printf("total %d queries, %d hits, %d partials $ %.3fms\n", words, hits, partials, total);
}

