// Program to build a .blf files from various .dat files
//
// Copyright 2001, KUN.
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Library General Public License for more details.
//
// You should have received a copy of the GNU Library General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

// $Id: lexgen.cc,v 1.20 2001/10/25 09:23:07 ejv Exp $

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <iostream.h>
#include <fstream.h>
#include <errno.h>
#include <sys/stat.h>
#include <unistd.h>

#include "pool.h"
#include "avltrie.h"
#include "datparser.h"
#include "lifparser.h"
#include "options.h"
#include "field.h"
#include "globals.h"
#include "lexfileio.h"
#include <meminfo.h>


//------------------------------------------------------------------------------
// Global data
//------------------------------------------------------------------------------
const string prog_name = "lexgen";
const string version = "2.0";
const string dat_ext = ".dat";
const string lex_ext = ".blf";
const string lif_ext = ".lif";

Options *options;
Pool AvlNode::pool(sizeof(AvlNode));
Pool EntryList::pool(sizeof(EntryList));

AffixTable affix_table; 		// contains all affix sets
ParamTable param_table; 		// contains all parameter lists
NontermTable nonterm_table;		// contains all nonterminals
AvlTrie rule_table;                     // trie with lexemes and entries

LexEntryListIdx lex_entry_list_idx;     // maps offsets to entrylist indexes
LexNontermList lex_nonterm_list;
LexAffixNameList lex_affixname_list;
LexEntryList lex_entry_list;
LexTextAffixList lex_textaffix_list;
LexIntAffixList lex_intaffix_list;
vector<string> lex_modules;

//------------------------------------------------------------------------------
//------------------------------------------------------------------------------

void write_log(string logtext)
{
    if (options->verbose()) {
        clog << logtext << endl;
    }
}


//------------------------------------------------------------------------------
// Function:
//	static void show_stats(ostream& os)
//
// Description:
//	Print statistics on parsing, sharing of objects in tables and
//	size of trie.
//------------------------------------------------------------------------------

static void show_stats(ostream& os)
{
  os << "PARSING STATS" << endl;
  os << endl; 
  os << "TABLE        " << Field("ALLOCATED") << Field("REQUESTED") << endl;
  os << "affix sets   " << Field(affix_table.allocated())
                        << Field(affix_table.requested()) << endl;
  os << "parameters   " << Field(param_table.allocated())
                        << Field(param_table.requested()) << endl;
  os << "nonterminals " << Field(nonterm_table.allocated())
                        << Field(nonterm_table.requested()) << endl;
  os << endl;
}


//------------------------------------------------------------------------------
// Function:
//	unsigned process_lexica()
//
// Description:
//	Parse lexical database in file with name fn, and enter definitions
//	into trie. If verbose, report name of file being processed.
//	For each text affix, the parser updates the character frequencies
//	in freq.
//
// Return value:
//	Number of syntax erros while parsing file.
//------------------------------------------------------------------------------

static bool process_lexica()
{
    if (options->verbose()) {
        clog << "processing lexicon file(s):" << endl;
    }

    for (vector<string>::iterator i = lex_modules.begin();
         i != lex_modules.end();
         ++i) {
        if (options->verbose()) {
            clog << "\t\"" << *i << "\"" << endl;
        }

        DatParser* dat_parser = new DatParser(*i);
        if (!(dat_parser->parse())) {
            delete dat_parser;
            return false;
        }
        delete dat_parser;
    }

    if (options->verbose()) {
        clog << "finished reading lexicon files" << endl;
    }

    return true;
}


//------------------------------------------------------------------------------
// Function:
//	void process_lif()
//
// Description:
//	Read the entries from the LIF (Lexicon Interface File).
//------------------------------------------------------------------------------

static bool process_lif()
{
    string lif_name = options->get_lifname();

    if (options->verbose()) {
        clog << "processing \"" << lif_name << "\"" << endl;
    }

    LifParser lif_parser;
    if (lif_parser.no_lif_file(lif_name)) {
        if (options->verbose()) {
            clog << "no lif file present, stopping." << endl;
        }

        return false;
    } else {
        lif_parser.parse_lif(lif_name);
        return true;
    }
}


//------------------------------------------------------------------------------
// Function:
//	unsigned
//	write_entries(char* fn)
//
// Description:
//	Write definitions contained in rule table to .srt file.
//------------------------------------------------------------------------------

static unsigned calculate_offsets()
{
#ifdef DEBUG
    rule_table.print(cerr, 0);
#endif

    unsigned trie_sz = rule_table.generate_entries();

#ifdef DEBUG_TRIE
    cerr << "trie_sz = " << trie_sz << " bytes" << endl;
#endif

    return trie_sz;
}


//------------------------------------------------------------------------------
// Function:
//	void write_lexemes(char* fn, unsigned size)
//
// Description:
//	Write size, followed by trie, followed by Huffman coding
//	table of freq to .blf file.
//------------------------------------------------------------------------------

void write_lexemes(ostream &ofs, unsigned trie_sz)
{
    write_log("writing lexemes to file");

    ofs.write((char*)&trie_sz, sizeof(long));
    rule_table.generate_trie(ofs);

    if (!ofs) {
        cerr << "Error writing file lexeme file" << endl;
        exit(errno);
    }
}


//------------------------------------------------------------------------------
// Write rule data to .blf file
//------------------------------------------------------------------------------

void write_rule_data(ofstream &ofs)
{
#ifdef DEBUG
    cerr << "writing " << lex_intaffix_list.size() << " int affixes to file"
         << endl;
#endif // DEBUG
    ofs << lex_intaffix_list;

#ifdef DEBUG
    cerr << "writing " << lex_textaffix_list.size() << " text affixes to file"
         << endl;
#endif // DEBUG
    ofs << lex_textaffix_list;

#ifdef DEBUG
    cerr << "writing " << lex_affixname_list.size() << " set affixes to file"
         << endl;
#endif // DEBUG
    ofs << lex_affixname_list;

#ifdef DEBUG
    cerr << "writing " << lex_nonterm_list.size() << " nonterminals to file"
         << endl;
#endif // DEBUG
    ofs << lex_nonterm_list;

#ifdef DEBUG
    cerr << "writing " << lex_entry_list.size() << " entries to file" << endl;
#endif // DEBUG
    ofs << lex_entry_list;

#ifdef DEBUG
    cerr << "writing " << lex_entry_list_idx.size() << " entry list to file"
         << endl;
#endif // DEBUG
    ofs << lex_entry_list_idx;
}

//------------------------------------------------------------------------------
// Function: read_old_lexicon
//
// Description:
//      read the already stored lexicon.
//------------------------------------------------------------------------------

LEXICON* read_old_lexicon(const char* lexfilename)
{
    FILE* lexfile = fopen(lexfilename, "r");
    if (lexfile) {
#ifdef DEBUG
        cerr << "old lexicon file seems to exist -- reading it" << endl;
#endif // DEBUG

        LEXICON* lex = lexicon_new(lexfile);

        fclose(lexfile);

        return lex;
    } else {
#ifdef DEBUG
        cerr << "Something went wrong while reading the old lexicon file"
             << endl << "\trewriting it" << endl;
#endif // DEBUG

        return NULL;
    }
}


//------------------------------------------------------------------------------
// Function: lif_file_has_changed(LEXICON* old_lexicon)
//
// Description:
//      Checking for changes in the lif file by asking the lexicon nonterminals
//      and the affixes if things are different.
//------------------------------------------------------------------------------

bool lif_file_has_changed(LEXICON* old_lexicon)
{
#ifdef CMP_DEBUG
    cerr << "check old_lexicon ptr..." << endl;
#endif // CMP_DEBUG
    if (old_lexicon == NULL) {
        write_log("error while reading lexicon");
        return true;
    }

#ifdef CMP_DEBUG
    cerr << "check nonterminals..." << endl;
#endif // CMP_DEBUG
    if (lex_nonterm_list.has_changed(old_lexicon)) {
        write_log("nonterminals have changed");
        return true;
    }

#ifdef CMP_DEBUG
    cerr << "check affixes..." << endl;
#endif // CMP_DEBUG
    if (lex_affixname_list.has_changed(old_lexicon)) {
        write_log("affixes have changed");
        return true;
    }

    write_log("lex file hasn't changed");
    return false;
}


//------------------------------------------------------------------------------
// Function: bool dat_file_has_chaned(Options options)
//
// Description:
//      Check if the dat file is newer than the lex file. If so, things have
//      changed.
//------------------------------------------------------------------------------

bool check_dates(string fname1, string fname2)
{
    struct stat stat1;
    struct stat stat2;
#ifdef CMP_DEBUG
    cerr << "comparing dates of \"" << fname1;
    cerr << "\" and \"" << fname2 << "\"" << endl;
#endif

    if (stat(fname1.c_str(), &stat1)) {
#ifdef CMP_DEBUG
        cerr << "cannot stat " << fname1 << endl;
#endif
        return true;
    }

    if (stat(fname2.c_str(), &stat2)) {
#ifdef CMP_DEBUG
        cerr << "cannot stat " << fname2 << endl;
#endif
        return true;
    }

#ifdef CMP_DEBUG
    cerr << "stat1.st_mtime = " << stat1.st_mtime << endl;
    cerr << "stat2.st_mtime = " << stat2.st_mtime << endl;
    cerr << "stat1.st_ctime = " << stat1.st_ctime << endl;
    cerr << "stat2.st_ctime = " << stat2.st_ctime << endl;
#endif

    if (stat1.st_mtime > stat2.st_mtime) {
#ifdef CMP_DEBUG
        cerr << "mtime changed" << endl;
#endif
        return true;
    }

    if (stat1.st_ctime > stat2.st_ctime) {
#ifdef CMP_DEBUG
        cerr << "ctime changed" << endl;
#endif
        return true;
    }

    return false;
}

bool dat_file_has_chaned(Options* options)
{
    if (options->force_compilation()) {
        return true;
    }

    string ofile = options->get_output_filename();

    off_t nr = options->get_nr_input_files();
    for (off_t i = 0; i < nr; ++i) {
        if (check_dates(options->get_input_filename(i), ofile)) {
            return true;
        }
    }

    return false;
}


//------------------------------------------------------------------------------
// main
//------------------------------------------------------------------------------

int main(int argc, char* argv[]) {
    options = new Options(argc, argv);

    if (!process_lif()) return 0;

    if (!dat_file_has_chaned(options)) {
        write_log("dat file has not changed, checking lif file");

        LEXICON* old_lexicon = read_old_lexicon(options->get_output_filename().c_str());

        if (!lif_file_has_changed(old_lexicon)) {
            write_log("lif file has not changed, stopping");

            lexicon_free(old_lexicon);

            return 0;
        } else {
//            lexicon_free(old_lexicon);
        }
    } else {
        write_log("dat file has changed, rewriting blf file");
    }
    
    if (!process_lexica()) {
        return 2;
    }

    ofstream ofs(options->get_output_filename().c_str(), ios::out|ios::bin|ios::trunc);
    if (!ofs) {
        cerr << "Error: cannot open file `" << options->get_output_filename() << "'" << endl;
        return 4;
    }

    unsigned trie_sz;
    trie_sz = calculate_offsets();
    write_lexemes(ofs, trie_sz);

    lex_entry_list.generate_table();

#ifdef DEBUG
#if 0
    rule_table.print(cerr, 0);
    lex_entry_list_idx.dump();
    lex_entry_list.dump();
    lex_nonterm_list.dump();
    lex_affixname_list.dump();
#endif
#endif

    // Write all tables to file:
    write_rule_data(ofs);
    ofs.close();
  
    // Finally show some stats:
    if (options->verbose()) {
        show_stats(clog);
        log_memusage();
    }

    return 0;
}
