// Program to build a .blf file from various .dat files
//
// Copyright 2001, KUN.
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Library General Public License for more details.
//
// You should have received a copy of the GNU Library General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

// $Id: lexgen.cc,v 1.33 2006/03/22 15:25:08 marcs Exp $

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

using namespace std;
#include <errno.h>
#include <sys/stat.h>
#include <unistd.h>
#include <string>
#include <vector>
#include <fstream>

// libabase includes
#include <abase_meminfo.h>

// local includes
#include "field.h"
#include "globals.h"
#include "options.h"
#include "lexfileio.h"
#include "globsetafftable.h"
#include "lexentrylistidx.h"
#include "avltrie.h"
#include "globlexafflists.h"
#include "globtables.h"
#include "lexnontermlist.h"
#include "lifparser.h"
#include "datparser.h"

//------------------------------------------------------------------------------
// Global data
//------------------------------------------------------------------------------
const string dat_ext = ".dat";
const string lex_ext = ".blf";
const string lif_ext = ".lif";

Options *options;

AffixTable affix_table; 		// contains all affix sets
ParamTable param_table; 		// contains all parameter lists
NontermTable nonterm_table;		// contains all nonterminals

// will be written in this order:
AvlTrie rule_table;                     // trie with lexemes and entries
LexIntAffixList lex_intaffix_list;
LexTextAffixList lex_textaffix_list;
LexAffixNameList lex_affixname_list;
LexNontermList lex_nonterm_list;
//LexEntryList lex_entry_list;		now member of LexEntryListIdx
//LexEntryListIdx lex_entry_list_idx;	now local in write_all_data

vector<string> lex_modules;
void add_to_lex_modules(string name)
{
    lex_modules.push_back(name);
}

//------------------------------------------------------------------------------
//------------------------------------------------------------------------------

void write_log(string logtext)
{
    if (options->verbose() >= 2) {
        cout << logtext << endl;
    }
}

//------------------------------------------------------------------------------
// Function:
//	static void show_stats(ostream& os)
//
// Description:
//	Print statistics on parsing, sharing of objects in tables and
//	size of trie.
//------------------------------------------------------------------------------

static void show_stats(ostream& os)
{
  os << "STATISTICS" << endl;
  os << endl; 
  os << "TABLE        " << Field("ALLOCATED") << Field("REQUESTED") << endl;
  os << "affix sets   " << Field(affix_table.allocated())
                        << Field(affix_table.requested()) << endl;
  os << "parameters   " << Field(param_table.allocated())
                        << Field(param_table.requested()) << endl;
  os << "nonterminals " << Field(nonterm_table.allocated())
                        << Field(nonterm_table.requested()) << endl;
  os << endl;
}

//------------------------------------------------------------------------------
// Function:
//	unsigned process_lexica()
//
// Description:
//	Parse lexical database in file with name fn, and enter definitions
//	into trie. If verbose, report name of file being processed.
//	For each text affix, the parser updates the character frequencies
//	in freq.
//
// Return value:
//	Number of syntax erros while parsing file.
//------------------------------------------------------------------------------

static bool process_lexica()
{
    // 2002-03-19  pb  next 3 lines with verbosity removed
    // if (options->verbose()) {
    //     cout << "processing lexicon file(s):" << endl;
    // }

    for (vector<string>::iterator i = lex_modules.begin();
         i != lex_modules.end();
         ++i) {
	cout << "  reading lexicon \"" << *i << "\"" << endl;

        DatParser* dat_parser = new DatParser(*i + dat_ext);
        if (!(dat_parser->parse())) {
            delete dat_parser;
            return false;
        }
        delete dat_parser;
    }

    // 2002-03-19  pb  next 3 lines with verbosity removed
    // if (options->verbose()) {
    //     cout << "finished reading lexicon files" << endl;
    // }

    return true;
}


//------------------------------------------------------------------------------
// Function:
//	void process_lif()
//
// Description:
//	Read the entries from the LIF (Lexicon Interface File).
//------------------------------------------------------------------------------

static bool process_lif()
{
    string lif_name = options->get_lifname();

    // 2002-03-19  pb  next 3 lines with verbosity removed
    // if (options->verbose()) {
    //     cout << "processing \"" << lif_name << "\"" << endl;
    // }

    LifParser lif_parser;
    if (lif_parser.no_lif_file(lif_name)) {
        if (options->verbose()) {
            cout << "no lif file present, finished." << endl;
        }
        return false;
    } else if (lif_parser.parse_lif(lif_name, &lex_nonterm_list)) {
	return true;
    } else {
	if (options->verbose()) {
	    cout << "can't read lif file so no lexicon, stopping." << endl;
	}
	return false;
    }
}

//------------------------------------------------------------------------------
// Write all data to .blf file
//------------------------------------------------------------------------------
static void write_all_data(ostream &os)
{
    // 2002-10-13 FN used only in the write process:
    LexEntryListIdx lex_entry_list_idx;	// maps offsets to entrylist indexes

#ifdef DEBUG_LEXGEN
    rule_table.print(cout, 0);
#endif
    rule_table.write_output(os, &lex_entry_list_idx);
    if (!os) {
        cout << "Error writing lexemes to output file" << endl;
        exit(errno);
    }

    may_log_timestamp("lex_entry_list.generate_table");
    lex_entry_list_idx.generate_entry_table(&lex_nonterm_list);

#ifdef DEBUG_LEXGEN
#if 0
    rule_table.print(cout, 0);
    lex_entry_list_idx.dump();
    lex_entry_list.dump();
    lex_nonterm_list.dump();
    lex_affixname_list.dump();
#endif
#endif

    lex_intaffix_list.write_output(os);
    lex_textaffix_list.write_output(os);
    lex_affixname_list.write_output(os);
    lex_nonterm_list.write_output(os);
    lex_entry_list_idx.write_output(os); // writes lex_entry_list first
}

//------------------------------------------------------------------------------
// Function: read_old_lexicon
//
// Description:
//      read the already stored lexicon.
//------------------------------------------------------------------------------

Lexicon read_old_lexicon(const char* lexfilename)
{
    FILE* lexfile = fopen(lexfilename, "r");
    if (lexfile) {
#ifdef DEBUG_LEXGEN
        cout << "old lexicon file seems to exist -- reading it" << endl;
#endif // DEBUG_LEXGEN

        Lexicon lex = lexicon_new(lexfile);

        fclose(lexfile);

        return lex;
    } else {
#ifdef DEBUG_LEXGEN
        cout << "Something went wrong while reading the old lexicon file"
             << endl << "\trewriting it" << endl;
#endif // DEBUG_LEXGEN

        return NULL;
    }
}


//------------------------------------------------------------------------------
// Function: lif_file_has_changed(Lexicon old_lexicon)
//
// Description:
//      Checking for changes in the lif file by asking the lexicon nonterminals
//      and the affixes if things are different.
//------------------------------------------------------------------------------

bool lif_file_has_changed(Lexicon old_lexicon)
{
#ifdef CMP_DEBUG
    cout << "check old_lexicon ptr..." << endl;
#endif // CMP_DEBUG
    if (old_lexicon == NULL) {
        write_log("error while reading lexicon");
        return true;
    }

#ifdef CMP_DEBUG
    cout << "check nonterminals..." << endl;
#endif // CMP_DEBUG
    if (lex_nonterm_list.has_changed(old_lexicon)) {
        write_log("nonterminals have changed");
        return true;
    }

#ifdef CMP_DEBUG
    cout << "check affixes..." << endl;
#endif // CMP_DEBUG
    if (lex_affixname_list.has_changed(old_lexicon)) {
        write_log("affixes have changed");
        return true;
    }

    write_log("lex file hasn't changed");
    return false;
}


//------------------------------------------------------------------------------
// Function: bool dat_file_has_changed(Options options)
//
// Description:
//      Check if the dat file is newer than the lex file. If so, things have
//      changed.
//------------------------------------------------------------------------------

bool check_dates(string fname1, string fname2)
{
    struct stat stat1;
    struct stat stat2;
#ifdef CMP_DEBUG
    cout << "comparing dates of \"" << fname1;
    cout << "\" and \"" << fname2 << "\"" << endl;
#endif

    if (stat(fname1.c_str(), &stat1)) {
#ifdef CMP_DEBUG
        cout << "cannot stat " << fname1 << endl;
#endif
        return true;
    }

    if (stat(fname2.c_str(), &stat2)) {
#ifdef CMP_DEBUG
        cout << "cannot stat " << fname2 << endl;
#endif
        return true;
    }

#ifdef CMP_DEBUG
    cout << "stat1.st_mtime = " << stat1.st_mtime << endl;
    cout << "stat2.st_mtime = " << stat2.st_mtime << endl;
    cout << "stat1.st_ctime = " << stat1.st_ctime << endl;
    cout << "stat2.st_ctime = " << stat2.st_ctime << endl;
#endif

    if (stat1.st_mtime > stat2.st_mtime) {
#ifdef CMP_DEBUG
        cout << "mtime changed" << endl;
#endif
        return true;
    }

    if (stat1.st_ctime > stat2.st_ctime) {
#ifdef CMP_DEBUG
        cout << "ctime changed" << endl;
#endif
        return true;
    }

    return false;
}

bool dat_file_has_changed(Options* options)
{
    if (options->force_compilation()) {
        return true;
    }

    string ofile = options->get_output_filename();

    for (vector<string>::iterator mi = lex_modules.begin();
         mi != lex_modules.end();
         ++mi) {
        if (check_dates(*mi + dat_ext, ofile)) {
            return true;
        }
    }

    return false;
}


//------------------------------------------------------------------------------
// main
//------------------------------------------------------------------------------

int main(int argc, char* argv[]) {
    options = new Options(argc, argv);

    if (!process_lif()) return 0;

    if (!dat_file_has_changed(options)) {
        write_log("dat file has not changed, checking lif file");

        Lexicon old_lexicon = read_old_lexicon(options->get_output_filename().c_str());

        if (!lif_file_has_changed(old_lexicon)) {
            write_log("lif file has not changed, finished.");

            lexicon_free(old_lexicon);

            return 0;
        } else {
//            lexicon_free(old_lexicon);
        }
    } else {
        write_log("dat file has changed, rewriting blf file");
    }
    
    if (!process_lexica()) {
        return 2;
    }

    ofstream ofs(options->get_output_filename().c_str(), ios::out|ios::binary|ios::trunc);
    if (!ofs) {
        cout << "Error: cannot open file `" << options->get_output_filename() << "'" << endl;
        return 4;
    }

    may_log_timestamp("write_all_data");
    write_all_data(ofs);
    if (!ofs) {
        cout << "Error writing output file" << endl;
        exit(errno);
    }
    may_log_timestamp("close");
    ofs.close();
  
    // Finally show some stats:
    if (options->verbose() >= 2) {
        show_stats(cout);
    }
    if (options->verbose()) {
        abs_report_meminfo ();
    }

    return 0;
}
