/*
   File: lxcn_test_nfa.c
   
   Copyright 2009 Radboud University of Nijmegen

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.

   CVS ID: "$Id$"
*/
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <limits.h>
#include <assert.h>
#include <time.h>
#include <lxcn_nfa.h>

/* reading aids */

inline int min2(int a, int b)
{
    return (a<b)? a : b;
}

inline int min(int a, int b, int c)
{
    return min2(min2(a,b), c);
}

/* dynamic programming based solution, for reference */

#define ins_cost(c) 1
#define del_cost(c) 1
#define repl_cost(a,b) (a!=b)

int editdistance(char *input, char *word)
{
    int i, j;
    int n = strlen(input);
    int m = strlen(word);

    int matrix[n+1][m+1];   /* ! */

    matrix[n][m] = 0;
    for(i=m-1; i >= 0; i--)
	matrix[n][i] = matrix[n][i+1] + ins_cost(word[i]);

    for(j=n-1; j >= 0; j--) {
	matrix[j][m] = matrix[j+1][m] + del_cost(input[j]);
	for(i=m-1; i >= 0; i--)
	    matrix[j][i] = min(
		    matrix[j+1][i]   + del_cost(input[j]),
		    matrix[j][i+1]   + ins_cost(word[i]),
		    matrix[j+1][i+1] + repl_cost(input[j], word[i])
	          );
    }

    return matrix[0][0];
}

/* ================== */

void corrupt(char *string, char *original)
{
    int len = strlen(string);
    if(len == 0) {
	len = strlen(original);
	strcpy(string, original);
    }
    int pos = *string? rand()%len : 0;
    switch(rand()%20) {
    case 0:
	string[pos] = (rand()%26) + 'a';
	break;
    case 1:
	if(len >= 99) return;
	memmove(string+pos+1,  string+pos, len-pos+1);
	string[pos] = (rand()%26) + 'a';
	break;
    case 2:
	memmove(string+pos, string+pos+1, len-pos+2);
	break;

    default:
	string[pos] = original[pos];
    }
}

/* Blackbox test */

int fuzzy_match(AutomatonState fsm, char *string, int accuracy)
{
    int x, y;

    lxcn_nfa_feed_string(&fsm, string);
    if(!lxcn_nfa_accepts(&fsm))
	return accuracy+1;

    lxcn_nfa_get_max_pos(&fsm, &x, &y);
    return y;
}

#if TRANSPOSITIONS
#error no reference algorithm 
#endif

int main(int argc, char *argv[])
{
    int i, accuracy = argv[2]? atoi(argv[2]) : 0;
    char *original = argv[1];
    char string[100];

    AutomatonData data;
    AutomatonState state;

    if(!original) original = "abcdefghijklmnopqrstuvwxyz";
    if(!accuracy) accuracy = MAX_EDITDISTANCE;

    srand(time(NULL));
    strcpy(string, original);

    lxcn_init_char_tables (" \t\n\r",  "", "", "");
    lxcn_nfa_create(original, accuracy, &data, &state);

    printf("Testing using random edit ops (limit %d)\nUsing the string '%s'\n", accuracy, original);

    for(i=0; i < 100000; i++) {
        int classic, liblex;
	corrupt(string, original);
        classic = editdistance(original, string);
	liblex  = fuzzy_match(state, string, accuracy);
        printf("%64s | %4d %4d\n", string, classic, liblex);

        assert(classic == liblex || classic > accuracy && liblex == accuracy+1);
    }
}


