/*
   File: agfl-lexgen.c
   Defines main program for agfl-lexgen

   Copyright 2009-2010 Radboud University of Nijmegen

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.

   CVS ID: "$Id$"   
*/

/* System includes */
#include <stdio.h>
#include <stdlib.h>

/* Libabase includes */
#include <abase_error.h>
#include <abase_meminfo.h>
#include <abase_fileutil.h>

/* Local includes */
#include "options.h"
#include "globals.h"
#include "lexgen_lexer.h"
#include "dyn_array.h"
#include "lif_parser.h"
#include "affix_values.h"
#include "entries.h"
#include "nonterminals.h"
#include "datfct_parser.h"
#include "trp_parser.h"

static void generate_info_output ()
{ FILE *info = abs_fopen (info_fname, "w");
  if (info != NULL)
  { generate_affix_values_info (info);
    generate_nonterminals_info (info);
    generate_terminals_info (info);
    generate_calls_info (info);
    generate_entries_info (info);
    fclose (info);
  }
};

static void generate_dump_output ()
{ FILE *dump = abs_fopen (dump_fname, "w");
  if (dump != NULL)
  { dump_vocabulary_entries (dump);
    dump_fact_tables (dump);
    fclose (dump);
  }
};

static int write_lexicon_file ()
{ BinFile blx = abs_bin_fopen (lexicon_fname, "w");
  if (blx)
  { abs_bin_save_version (blx, "binary lexicon");
    bin_save_text_array (blx, lexicon_names);
    bin_save_text_array (blx, fact_table_names);
    bin_save_text_array (blx, triples_database_names);
    abs_bin_save_int (blx, hybrid_parsing);
    bin_save_affix_values (blx);
    bin_save_nonterminals (blx);
    bin_save_terminals (blx);
    bin_save_calls (blx);
    bin_save_entries (blx);
    bin_save_vocabularies (blx);
    bin_save_fact_tables (blx);
    abs_bin_save_eof (blx);
    abs_bin_fclose (blx);
    return 0;
  }
  return -1;
}

static int verify_new_lif_file (BinFile old_blx)
{ int hyb_flag;
  int err;
  err = abs_bin_verify_version (old_blx, "binary lexicon", 1);
  if (err != 0) return (1);
  if (bin_cmp_text_array (old_blx, lexicon_names)) return (1);
  if (bin_cmp_text_array (old_blx, fact_table_names)) return (1);
  if (bin_cmp_text_array (old_blx, triples_database_names)) return (1);
  abs_bin_load_int (old_blx, &hyb_flag);
  if (hyb_flag != hybrid_parsing) return (1);
  if (has_new_lif_affix_values (old_blx)) return (1);
  if (has_new_lif_nonterminals (old_blx)) return (1);
  if (has_new_lif_terminals (old_blx)) return (1);
  return (0);
}

static int verify_dat_file_age (char *name, file_kind kind)
{ char path[MAXPATHLEN + 1];
  time_t blx_age, file_age;

  /* We should not be able to fail on this one, having opened it recently */
  if (!abs_file_mtime (lexicon_fname, &blx_age)) return (1);
  sprintf (path, "%s.%s", name, suffix_from_file_kind (kind));

  /* If the source file does not exist anymore, die */
  if (!abs_file_mtime (path, &file_age))
    abs_fatal ("could not determine age of file '%s'", path);
  
  /* Recompile if blx is older than dat */
  return (blx_age <= file_age);
}

static int recompilation_necessary ()
{ BinFile old_blx;
  int stat, ix;

  /* If forced, do it */
  if (lexgen_forced) return (1);

  /* Check if the old binary lexicon matches with the lif */
  if (!abs_file_exists (lexicon_fname)) return (1);
  old_blx = abs_bin_fopen (lexicon_fname, "r");
  if (old_blx == NULL) return (1);
  stat = verify_new_lif_file (old_blx);
  abs_bin_fclose (old_blx);
  if (stat) return (1);
  if (verbose) abs_message ("    lexicon interface did not change");

  /* Check if the old binary lexicon is younger than the source lexicon files */
  for (ix = 0; ix < lexicon_names -> size; ix++)
    { stat = verify_dat_file_age (lexicon_names -> array[ix], lexicon);
      if (stat && verbose)
	abs_message ("    lexicon %s changed since last binary lexicon generation",
		     lexicon_names -> array[ix]);
      if (stat) return (1);
    };

  /* Check if the old binary lexicon is younger than the source fact files */
  for (ix = 0; ix < fact_table_names -> size; ix++)
    { stat = verify_dat_file_age (fact_table_names -> array[ix], fact);
      if (stat && verbose)
	abs_message ("    fact table %s changed since last binary lexicon generation",
		     fact_table_names -> array[ix]);
      if (stat) return (1);
    };

  /* Check if the old binary lexicon is younger than the triple files */
  if (hybrid_parsing)
    { for (ix = 0; ix < triples_database_names -> size; ix++)
	{ stat = verify_dat_file_age (triples_database_names -> array[ix], triple);
          if (stat && verbose)
            abs_message ("    triple db %s changed since last binary lexicon generation",
		     triples_database_names -> array[ix]);
          if (stat) return (1);
	}
      if (abs_file_ext_exists(RELATORS_DAT_FILE, suffix_from_file_kind(relators)))
      { stat = verify_dat_file_age (RELATORS_DAT_FILE, relators);
        if (stat && verbose)
          abs_message ("    %s file changed since last binary lexicon generation",
		       RELATORS_DAT_FILE);
        if (stat) return (1);
      }
    };

  if (verbose) abs_message ("    no changes in lexicon, fact table or triple files");
  return (0);
}

static void report_statistics ()
{ abs_message ("    collected %d INT affixes, %d TEXT affixes, %d set affixes",
	       nr_of_int_affixes (), nr_of_text_affixes (), nr_of_set_affixes ());
  abs_message ("    collected %d grammar terminals, %d lexicon/fact nonterminals",
	       nr_of_terminals (), nr_of_nonterminals ());
  abs_message ("    collected %d different calls of lexicon/fact nonterminals", nr_of_calls ());
  abs_message ("    collected %d critical text parameters", crit_text_vector -> size - 1);
  abs_message ("    collected %d entries, %d feature entries",
	       nr_of_entry_lists (), nr_of_entries ());
  abs_message ("    omitted %d uninteresting triples", nr_uninteresting_triples ());

}

int main (int argc, char **argv)
{ parse_command_line (argc, argv);
  init_affix_values ();
  init_nonterminals ();

  parse_lexicon_interface ();
  calculate_affix_values_closure ();
  if (!recompilation_necessary ())
    return (0);

  init_entries ();
  parse_datfct_files ();
  if (hybrid_parsing) parse_triples ();

  if (generate_info) generate_info_output ();
  if (generate_dump) generate_dump_output ();
  write_lexicon_file ();
  if (verbose)
    { report_statistics ();
      abs_report_meminfo ();
    };
  return (0);
}
