/*
   File: erun_options.c
   Defines the parsing of the command line to run eag3 grammars

   Copyright 2012 Marc Seutter

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.

   CVS ID: "$Id: erun_options.c,v 1.7 2012/12/27 17:59:54 marcs Exp $"
*/

/* standard includes */
#include <stdio.h>
#include <stdlib.h>

/* libdcg includes */
#include <dcg.h>
#include <dcg_error.h>
#include <dcg_string.h>

/* libebase includes */
#include <ebase_version.h>
#include <ebase_input.h>

/* libeagrts includes */
#include <erts_handle.h>
#include <erts_handle_impl.h>

/* local includes */
#include "erun_options.h"

/* Private options which might overrule the (grammar) defaults */
static int no_hybrid_parsing;		/* Disable the hybrid parsing flag */
static int my_input_mode;		/* To overrule the grammar input mode */
static int my_partial_parse;		/* To overrule the grammar partial parse */
static int my_tree_stack_size;		/* To overrule the default tree stack size */
static int my_cont_stack_size;		/* To overrule the default continuation stack size */
static trans_kind my_transduction;	/* To overrule the default transduction */
static int my_nr_of_parses;		/* To overrule the default number of parses */

/* Options for debugging purposes */
static int show_version;		/* Do show version before processing */
static int show_options;		/* Show the options in the output */

static void init_options ()
{ no_hybrid_parsing = 0;
  my_input_mode = -1;
  my_partial_parse = -1;
  my_tree_stack_size = -1;
  my_cont_stack_size = -1;
  my_transduction = tr_undefined;
  my_nr_of_parses = -1;
  show_version = 0;
  show_options = 0;
}

static void print_usage ()
{ dcg_wlog ("usage: eag3-run [flags] grammarname [flags][input filename][flags]");
  dcg_wlog ("where grammarname is the base name of the grammar (with an optional path prefix)");
  dcg_wlog ("and flags may be any of the following:");
  dcg_wlog ("   -h:  provide this help");
  dcg_wlog ("   -d:  enable debugging");
  dcg_wlog ("   -V:  show version");
  dcg_wlog ("   -N:  disable hybrid parsing");
  dcg_wlog ("   -S:  use \\001 as document separator (LCS convention)");
  dcg_wlog ("   -B:  only accept parses with best penalty level");
  dcg_wlog ("   -P number     : provide this maximum number of parses");
  dcg_wlog ("   -p line       : treat input in |line| mode");
  dcg_wlog ("   -p paragraph  : treat input in |paragraph| mode");
  dcg_wlog ("   -p document   : treat input in |document| mode");
  dcg_wlog ("   -i input_fname: use input_fname in $POS processing");
  dcg_wlog ("   -o file_name  : write output to file");
  dcg_wlog ("   -ts value     : use value as tree stack size");
  dcg_wlog ("   -qs value     : use value as continuation stack size");
  dcg_wlog ("   -t0: do not transduce");
  dcg_wlog ("   -ta: use top level affixes as transduction (current default)");
  dcg_wlog ("   -tc: use number of parses as transduction");
  dcg_wlog ("   -tt: use parse tree as transduction");
  dcg_wlog ("   -t:  transduce as specified");
  dcg_wlog ("   -pp: allow partial parsing");
  dcg_wlog ("   -fp: enforce full parsing");
  dcg_wlog ("   -so: show options");
  dcg_wlog ("   -st: show trellis");
  dcg_exit (4);
}

static void syntax_error (char *syn_error)
{ dcg_error (0, "error on command line: %s", syn_error);
  print_usage ();
}

static void scan_options (EagrtsHandle hnd, char *arg, int *idx, int argc, char **argv)
{ if (streq (arg, "h")) print_usage ();
  else if (streq (arg, "V")) show_version = 1;
  else if (streq (arg, "N")) no_hybrid_parsing = 1;
  else if (streq (arg, "S")) hnd -> lcsdoc_sync_option = 1;
  else if (streq (arg, "d")) debug = 1;
  else if (streq (arg, "i"))
    { *idx += 1;
      if (*idx >= argc)
	syntax_error ("missing $POS input file name");
      else if (hnd -> input_pos_fname != NULL)
	{ dcg_wlog ("previous -i option or argument overridden");
	  detach_string (&hnd -> input_pos_fname);
	};
      hnd -> input_pos_fname = new_string (argv[*idx]);
    }
  else if (streq (arg, "o"))
    { *idx += 1;
      if (*idx >= argc)
	syntax_error ("missing output file name");
      else if (hnd -> output_fname != NULL)
	{ dcg_wlog ("previous -i option or argument overridden");
	  detach_string (&hnd -> output_fname);
	};
      hnd -> output_fname = new_string (argv[*idx]);
    }
  else if (streq (arg, "p"))
    { int new_mode = -1;
      *idx += 1;
      if (*idx < argc)
	{ if (streq (argv[*idx], "line")) new_mode = LineInputMode;
	  else if (streq (argv[*idx], "paragraph")) new_mode = ParagraphInputMode;
	  else if (streq (argv[*idx], "document")) new_mode = DocumentInputMode;
	  else syntax_error ("erroneous input mode");
	}
      else syntax_error ("missing input mode");
      if (my_input_mode != -1)
	dcg_wlog ("previous -p option overridden");
      my_input_mode = new_mode;
    }
  else if (streq (arg, "ts"))
    { int new_value = -1;
      *idx += 1;
      if (*idx < argc)
	new_value = (int) strtol (argv[*idx], (char **) NULL, 0);
      else syntax_error ("missing tree stack size");
      if (new_value <= 0)
	syntax_error ("illegal tree stack size");
      if (my_tree_stack_size != -1)
	dcg_wlog ("previous -ts option overridden");
      my_tree_stack_size = new_value;
    }
  else if (streq (arg, "qs"))
    { int new_value = -1;
      *idx += 1;
      if (*idx < argc)
	new_value = (int) strtol (argv[*idx], (char **) NULL, 0);
      else syntax_error ("missing continuation stack size");
      if (new_value <= 0)
	syntax_error ("illegal continuation stack size");
      if (my_cont_stack_size != -1)
	dcg_wlog ("previous -qs option overridden");
      my_cont_stack_size = new_value;
    }
  else if (streq (arg, "B")) hnd -> best_parses = 1;
  else if (streq (arg, "P"))
    { int new_value = -1;
      *idx += 1;
      if (*idx < argc)
        new_value = (int) strtol (argv[*idx], (char **) NULL, 0);
      else syntax_error ("missing number of parses");
      if (new_value <= 0)
        syntax_error ("illegal number of parses");
      if (my_nr_of_parses != -1)
        dcg_wlog ("previous -P option overridden");
      my_nr_of_parses = new_value;
    }
  else if (streq (arg, "t0"))
    { if (my_transduction != tr_undefined)
	dcg_wlog ("previous kind of transduction overridden");
      my_transduction = tr_none;
    }
  else if (streq (arg, "ta"))
    { if (my_transduction != tr_undefined)
	dcg_wlog ("previous kind of transduction overridden");
      my_transduction = tr_affix;
    }
  else if (streq (arg, "tc"))
    { if (my_transduction != tr_undefined)
	dcg_wlog ("previous kind of transduction overridden");
      my_transduction = tr_count;
    }
  else if (streq (arg, "tt"))
    { if (my_transduction != tr_undefined)
	dcg_wlog ("previous kind of transduction overridden");
      my_transduction = tr_tree;
    }
  else if (streq (arg, "t"))
    { if (my_transduction != tr_undefined)
	dcg_wlog ("previous kind of transduction overridden");
      my_transduction = tr_user;
    }
  else if (streq (arg, "pp"))
    { if (my_partial_parse != -1)
	dcg_wlog ("previous -pp/-fp overridden");
      my_partial_parse = 1;
    }
  else if (streq (arg, "fp"))
    { if (my_partial_parse != -1)
	dcg_wlog ("previous -pp/-fp overridden");
      my_partial_parse = 0;
    }
  else if (streq (arg, "so")) show_options = 1;
  else if (streq (arg, "st")) hnd -> show_trellis = 1;
  else syntax_error ("Unrecognizable option");
}

static void try_split_source_name (EagrtsHandle hnd, char *source_name)
{ char *sptr, *last_slash;

  /* Locate the last slash or DIR_SEP in the line */
  last_slash = NULL;
  for (sptr = source_name; (*sptr); sptr++)
    if ((*sptr == '/') || (*sptr == DIR_SEP)) last_slash = sptr;

  /* Determine the directory name and cut it from the source name */
  if (last_slash == NULL) hnd -> dir_name = new_string (".");
  else if (last_slash == source_name)
    { hnd -> dir_name = new_string ("/");
      source_name++;
    }
  else
    { size_t delta = last_slash - source_name;
      char buf[MAXPATHLEN + 1];
      strncpy (buf, source_name, delta);
      buf[delta] = '\0';
      hnd -> dir_name = new_string (buf);
      source_name = last_slash + 1;
    };

  /* We have a last component on the line, so this must be the base name */
  if (!*source_name)
    syntax_error ("Empty grammar name");
  hnd -> grammar_name = new_string (source_name);
}

void parse_command_line (EagrtsHandle hnd, int argc, char **argv)
{ int had_args = 0;
  int ix;
  init_options ();
  for (ix = 1; ix < argc; ix++)
    { char *arg = argv[ix];
      if (arg[0] == '-') scan_options (hnd, arg + 1, &ix, argc, argv);
      else if (had_args == 0)
	{ try_split_source_name (hnd, argv[ix]);
	  had_args++;
	}
      else if (had_args == 1)
	{ if (hnd -> input_pos_fname != NULL)
	    { dcg_wlog ("previous -i option overridden by argument");
	      detach_string (&hnd -> input_pos_fname);
	    };
	  hnd -> input_fname = new_string (argv[ix]);
	  hnd -> input_pos_fname = new_string (argv[ix]);
	  had_args++;
	}
      else if (had_args >= 2)
	syntax_error ("Too many arguments");
    };

  /* Exit without grammar */
  if (!had_args)
    syntax_error ("No grammar name given");
}

void overrule_grammar_options (EagrtsHandle hnd)
{ if (no_hybrid_parsing) hnd -> hybrid_parsing = 0;
  if (my_partial_parse != -1) hnd -> partial_parse = my_partial_parse;
  if (my_input_mode != -1) hnd -> input_mode = my_input_mode;
  if (my_tree_stack_size != -1) hnd -> tree_stack_size = my_tree_stack_size;
  if (my_cont_stack_size != -1) hnd -> cont_stack_size = my_cont_stack_size;
  if (my_nr_of_parses != -1)
    { if (hnd -> best_parses)
	syntax_error ("-B and -P can not be specified both");
      else hnd -> max_parses = my_nr_of_parses;
    };
  if (my_transduction != tr_undefined) hnd -> transduce_option = my_transduction;
}

void try_show_options (EagrtsHandle hnd)
{ if (!show_options) return;

  /* Files */
  dcg_wlog ("Directory name is '%s'", hnd -> dir_name);
  dcg_wlog ("Grammar name is '%s'", hnd -> grammar_name);
  if (hnd -> input_fname == NULL) dcg_wlog ("Input is read from stdin"); 
  else dcg_wlog ("Input is read from '%s'", hnd -> input_fname);
  if (hnd -> output_fname == NULL) dcg_wlog ("Output is written to stdout"); 
  else dcg_wlog ("Output is written to '%s'", hnd -> output_fname);
  if (hnd -> input_fname == NULL) dcg_wlog ("$POS will not generate file names"); 
  else dcg_wlog ("$POS will use '%s' to generate file names", hnd -> input_pos_fname);

  /* Flags */
  if (hnd -> hybrid_parsing) dcg_wlog ("Hybrid parsing active");
  else dcg_wlog ("No hybrid parsing");
  if (hnd -> partial_parse) dcg_wlog ("Partial parsing is ok");
  else dcg_wlog ("No partial parsing");
  switch (hnd -> input_mode)
    { case LineInputMode:	dcg_wlog ("Input operates in |line mode|"); break;
      case ParagraphInputMode:	dcg_wlog ("Input operates in |paragraph mode|"); break;
      case DocumentInputMode:	dcg_wlog ("Input operates in |document mode|"); break;
      default: dcg_bad_tag (hnd -> input_mode, "try_show_options");
    };
  if (hnd -> lcsdoc_sync_option) dcg_wlog ("LCS document separators are recognized");
  else dcg_wlog ("LCS document separation is inactive");

  /* Other information */
  dcg_wlog ("Tree stack size is %d", hnd -> tree_stack_size);
  dcg_wlog ("Continuation stack size is %d", hnd -> cont_stack_size);
  if (hnd -> show_trellis) dcg_wlog ("Dumping of the trellis is enabled");
  if (hnd -> best_parses) dcg_wlog ("Only accept best parses");
  if (hnd -> max_parses != INT_MAX) dcg_wlog ("Only accept %d parses", hnd -> max_parses);
  switch (hnd -> transduce_option)
    { case tr_none:	dcg_wlog ("No transduction output will be provided"); break;
      case tr_affix:	dcg_wlog ("The top level affix values are used as output");
			break;
      case tr_count:	dcg_wlog ("The number of correct parses is produced as output"); break;
      case tr_tree:	dcg_wlog ("The parse tree will be provided as output"); break;
      case tr_user:	dcg_wlog ("The user defined transduction will be output"); break;
      default: dcg_bad_tag (hnd -> transduce_option, "try_show_option");
    };
}
