/*
   File: erts_trans.c
   Defines the second pass, i.e. transduction, for the EAG3 runtime system

   Copyright 2012 Marc Seutter

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.

   CVS ID: "$Id: erts_trans.c,v 1.6 2013/01/11 14:52:05 marcs Exp $"
*/

/* global includes */
#include <stdio.h>

/* libdcg includes */
#include <dcg.h>
#include <dcg_alloc.h>
#include <dcg_error.h>
#include <dcg_plist.h>
#include <dcg_string.h>
#include <dcg_dstring.h>

/* libebase includes */
#include <ebase_version.h>
#include <ebase_ds.h>
#include <ebase_input.h>
#include <ebase_affix_value_utils.h>
#include <ebase_utils.h>
#include <ebase_lexicon.h>
#include <ebase_lexicon_impl.h>

/* local includes */
#include "erts_tree.h"
#include "erts_trellis.h"
#include "erts_handle.h"
#include "erts_tree_impl.h"
#include "erts_trellis_impl.h"
#include "erts_handle_impl.h"
#include "erts_propagate.h"
#include "erts_predefs.h"
#include "erts_trans.h"

/*
   All parse results are stored in a linked list which is kept
   sorted in increasing order of penalty. When a parse result
   is achieved that recognized a longer input, all previous
   parse results are discarded.

   In the case that a maximum number of parses is specified,
   parse results may be kicked out. 

   As soon as parse results become available, a check is made 
   before starting transduction whether it has sense to add 
   the parse result.
*/
struct parse_result_rec 
{ Penalty penalty;	/* Penalty accrued in parse */
  int length;		/* length of parsed input */
  State next_state;	/* where to resume parsing */
  dstring data;		/* transduction result */
  ParseResult next;	/* Next parse result */
};

/*
   House keeping
*/
static void destroy_parse_result (ParseResult *old)
{ if (old == NULL) return;
  if (*old == parse_result_nil) return;
  dcg_free_dstring (&((*old) -> data));
  dcg_detach ((void **) old);
}

static void destroy_parse_results (ParseResult old)
{ ParseResult next = old;
  while (next != parse_result_nil)
    { ParseResult old = next;
      next = old -> next;
      dcg_free_dstring (&(old -> data));
      dcg_detach ((void **) &old);
    };
}

void erts_discard_parse_results (EagrtsHandle hnd)
{ destroy_parse_results (hnd -> all_results);
  hnd -> all_results = parse_result_nil;
  hnd -> nr_of_results = 0;
  hnd -> max_penalty = INT_MAX;
}

/*
   Determine if we want to enter the second pass
*/
static int current_parse_is_acceptable (EagrtsHandle hnd)
{ ParseResult first = hnd -> all_results;
  Penalty my_penalty = hnd -> curr_penalty;
  int my_length = hnd -> trellis -> curr_state -> offset - hnd -> start_state -> offset;

  /* Check length */
  if (first == parse_result_nil) return (1);
  if (my_length < first -> length) return (0);
  if (my_length > first -> length) return (1);

  /* Check penalty */
  if (my_penalty < first -> penalty) return (1);
  if (hnd -> best_parses) return (my_penalty == first -> penalty);

  /* Check max nr of parses: we keep track of the maximum penalty on the list */
  if (hnd -> nr_of_results < hnd -> max_parses) return (1);
  return (my_penalty < hnd -> max_penalty);
}

/*
   The following code will initialize a new parse result record and
   insert it at the right location in the all_results list.
*/
#define INIT_RESULT_LENGTH 1024
static void add_current_parse (EagrtsHandle hnd)
{ ParseResult my_result = (ParseResult) dcg_malloc (sizeof (struct parse_result_rec));
  ParseResult *insert_ptr;
  int last_max;
  my_result -> penalty = hnd -> curr_penalty;
  my_result -> next_state = hnd -> trellis -> curr_state;
  my_result -> length = my_result -> next_state -> offset - hnd -> start_state -> offset;
  my_result -> data = dcg_init_dstring (INIT_RESULT_LENGTH);
  my_result -> next = parse_result_nil;
  hnd -> curr_result = my_result;

  /* If we recognized a longer input than previously, all_results can be discarded */
  if ((hnd -> all_results == parse_result_nil) ||
      (hnd -> all_results -> length < my_result -> length))
    { destroy_parse_results (hnd -> all_results);
      hnd -> all_results = my_result;
      hnd -> nr_of_results = 1;
      hnd -> max_penalty = my_result -> penalty;
      return;
    };

#ifdef DEBUG
  /* The new parse must have equal length as the ones already existing */
  if (hnd -> all_results -> length != my_result -> length)
    dcg_internal_error ("add_current_parse");
#endif

  /* Check for best parses */
  if (hnd -> best_parses)
    { if (hnd -> all_results -> penalty > my_result -> penalty)
        { destroy_parse_results (hnd -> all_results);
          hnd -> all_results = my_result;
          hnd -> nr_of_results = 1;
          hnd -> max_penalty = my_result -> penalty;
          return;
	}

#ifdef DEBUG
      /* The new parse must have equal penalty as the ones already existing */
      if (hnd -> all_results -> penalty != my_result -> penalty)
        dcg_internal_error ("add_current_parse");
#endif
      /* Insert at head: for proper history it should be the tail */
      my_result -> next = hnd -> all_results;
      hnd -> all_results = my_result;
      return;
    };
      
  /* We have to insert the new parse at the right location in the list */
  insert_ptr = &hnd -> all_results;
  while ((*insert_ptr != parse_result_nil) &&
         ((*insert_ptr) -> penalty <= my_result -> penalty))
    insert_ptr = &(*insert_ptr) -> next;

  /* Found location to insert */
  my_result -> next = *insert_ptr;
  *insert_ptr = my_result;

  /* Check the number of parses */
  hnd -> nr_of_results++;
  if (hnd -> nr_of_results <= hnd -> max_parses) return;

#ifdef DEBUG
  /* There must be one left on the list */
  if ((*insert_ptr) -> next == parse_result_nil)
    dcg_internal_error ("add_current_parse");
#endif

  /* We still have to remove the last parse with the highest penalty */
  last_max = (*insert_ptr) -> penalty;
  while ((*insert_ptr) -> next != parse_result_nil)
    { last_max = (*insert_ptr) -> penalty;
      insert_ptr = &(*insert_ptr) -> next;
    };
  
  /* Final destruction */
  destroy_parse_result (insert_ptr);	/* Will also write the end of list */
  hnd -> max_penalty = last_max;
  hnd -> nr_of_results--;
}

/*
   Outputting of affix positions, trees, etc.
*/
static void output_indent (dstring ds, int indent)
{ int ix;
  dcg_append_dstring_c (ds, '\n');
  for (ix = 0; ix < indent; ix++)
    dcg_append_dstring_c (ds, ' ');
}

static void output_character (dstring ds, char ch)
{ unsigned int ich = (unsigned int) ch;
  switch (ch)
    { case '\f': dcg_append_dstring (ds, "\\f"); break;
      case '\n': dcg_append_dstring (ds, "\\n"); break;
      case '\r': dcg_append_dstring (ds, "\\r"); break;
      case '\t': dcg_append_dstring (ds, "\\t"); break;
      case '"': dcg_append_dstring (ds, "\\\""); break;
      default:
        if ((ich < 32) || (ich > 127))
          dcg_sprintfa_dstring (ds, "\\x%02x", (ich & 0xff));
        else dcg_append_dstring_c (ds, ch);
    };
}

static void output_string (dstring ds, char *text)
{ char *ptr;
  dcg_append_dstring_c (ds, '"');
  for (ptr = text; *ptr; ptr++)
    output_character (ds, *ptr);
  dcg_append_dstring_c (ds, '"');
}

static void output_transition_string (dstring ds, Transition trans)
{ char *ptr;
  dcg_append_dstring (ds, " \"");
  for (ptr = trans -> from; ptr != trans -> to; ptr++)
    output_character (ds, *ptr);
  dcg_append_dstring_c (ds, '"');
}

static void output_lattice (dstring ds, affix_value value, rt_domain dom)
{ affix_value dupl = rdup_affix_value (value);
  rt_element_list elts = dom -> elts;
  int first = 1;
  int ix;
  for (ix = 0; ix < elts -> size; ix++)
    { rt_element elt = elts -> array[ix];
      affix_value diff;
      if (!ebs_lattice_value_is_subset (elt -> value, dupl)) continue;
      if (!first) dcg_append_dstring_c (ds, '|');
      first = 0;
      dcg_append_dstring (ds, elt -> name);
      if (!ebs_diff_lattice_values (dupl, elt -> value, &diff)) break;
      detach_affix_value (&dupl);
      dupl = diff;
    };
  detach_affix_value (&dupl);
}

static void output_affix_value (dstring ds, affix_value value, rt_domain_list domains,
				int trans_affix)
{ if (value == affix_value_nil)
    { dcg_append_dstring (ds, "(null)");
      return;
    };
  switch (value -> tag)
    { case TAGNull_value: dcg_append_dstring (ds, "(Null)"); break;
      case TAGText_value:
	if (trans_affix) dcg_append_dstring (ds, value -> Text_value.text);
	else output_string (ds, value -> Text_value.text);
	break;
      case TAGInt_value:  dcg_sprintfa_dstring (ds, "%d", value -> Int_value.ival); break;
      case TAGReal_value: dcg_sprintfa_dstring (ds, "%g", value -> Real_value.rval); break;
      case TAGSmall_lattice:
	{ rt_domain rdom = domains -> array[value -> Small_lattice.dom];
	  output_lattice (ds, value, rdom);
	}; break;
      case TAGLarge_lattice:
	{ rt_domain rdom = domains -> array [value -> Large_lattice.dom];
	  output_lattice (ds, value, rdom);
	}; break;
      case TAGComposed_value:
	{ int ix;
	  affix_value_list parts = value -> Composed_value.parts;
	  dcg_sprintfa_dstring (ds, "<%d: ", value -> Composed_value.marker);
	  for (ix = 0; ix < parts -> size; ix++)
	    { if (ix) dcg_append_dstring_c (ds, ',');
	      output_affix_value (ds, parts -> array[ix], domains, trans_affix);
	    };
	  dcg_append_dstring_c (ds, '>');
	}; break;
      default: dcg_bad_tag (value -> tag, "ebs_output_value");
    };
}

static void output_transition (EagrtsHandle hnd, Transition trans)
{ dstring data = hnd -> curr_result -> data;
  Lexicon lex = hnd -> lexicon;
  int nr = trans -> nr;
  switch (trans -> class)
    { case TransError:
	dcg_append_dstring (data, "ERROR");
	break;
      case TransTerminal:
	dcg_sprintfa_dstring (data, "\"%s\"", lex -> rt_terminals -> array[nr] -> origin);
	break;
      case TransWord:
	dcg_append_dstring (data, "$WORD");
	output_transition_string (data, trans);
	break;
      case TransMatch:
	{ char *origin = lex -> rt_regexp_nfas -> array[nr] -> origin;
	  dcg_sprintfa_dstring (data, "$MATCH (\"%s\")", origin);
	  output_transition_string (data, trans);
	}; break;
      case TransSkip:
	{ char *origin = lex -> rt_regexp_nfas -> array[nr] -> origin;
	  dcg_sprintfa_dstring (data, "$SKIP (\"%s\")", origin);
	  output_transition_string (data, trans);
	}; break;
      case TransOther:
	dcg_append_dstring (data, "$OTHER");
	output_transition_string (data, trans);
	break;
      case TransAny:
	dcg_append_dstring (data, "$ANY");
	output_transition_string (data, trans);
	break;
      case TransEndOfText:
        dcg_append_dstring (data, "<EOT>");
	break;
      default: dcg_bad_tag (trans -> class, "output_transition");
    };
}

static void output_position (EagrtsHandle hnd, Position pos, int trans_affix)
{ affix_value value = erts_calc_affix_value (pos, upper_side);
  output_affix_value (hnd -> curr_result -> data,
		      value, hnd -> lexicon -> rt_domains, trans_affix);
  detach_affix_value (&value);
}

static void output_rt_nont (EagrtsHandle hnd, rt_nont rt, Position *positions, int nr_pos)
{ dstring data = hnd -> curr_result -> data;
  string_list name_parts = rt -> name_parts;
  int_list name_chars = rt -> name_chars;
  int fidx = 0, nidx = 0, in_args = 0, ix;
  for (ix = 0; ix < name_chars -> size; ix++)
    if (name_chars -> array[ix])
      { /* output name part */
	if (in_args) dcg_append_dstring (data, ") ");
	else if (nidx) dcg_append_dstring_c (data, ' ');
	dcg_append_dstring (data, name_parts -> array[nidx]);
	in_args = 0;
	nidx++;
      }
    else
      { /* output position */
	if (in_args) dcg_append_dstring (data, ", ");
	else dcg_append_dstring (data, " (");
	output_position (hnd, positions[fidx], 0);
	in_args = 1;
	fidx++;
      };
  if (in_args) dcg_append_dstring_c (data, ')');
}

static void output_quasi_node (EagrtsHandle hnd, int quasi_nr, Position *positions, int nr_pos)
{ dstring data = hnd -> curr_result -> data;
  char *name = name_from_quasi (quasi_nr);
  dcg_append_dstring (data, name);
  if (nr_pos)
    { dcg_append_dstring (data, " (");
      output_position (hnd, positions[0], 0);
      dcg_append_dstring_c (data, ')');
    };
}

static void output_tree (EagrtsHandle hnd, int indent, Tree tree)
{ dstring data = hnd -> curr_result -> data;
  output_indent (data, indent);
  if (tree == tree_nil)
    { dcg_append_dstring (data, "<null tree>");
      return;
    };
  switch (tree -> kind)
    { case normal_node:
      case simple_node:
      case lex_nont_node:
      case anonymous_node:
      case predicate_node:
        { int node_nr = tree -> number;
	  rt_nont rt = hnd -> rt_nonts -> array[node_nr];
	  int ix;
	  output_rt_nont (hnd, rt, tree -> positions, tree -> nr_pos);
	  for (ix = 0; ix < tree -> nr_sons; ix++)
	    output_tree (hnd, indent + 2, tree -> sons[ix]);
	  if (tree -> kind == lex_nont_node)
	    output_transition_string (data, tree -> trans);
	}; break;
      case confrontation_node:	
	dcg_append_dstring (data, "{}");
        break;
      case leaf_node:
	output_transition (hnd, tree -> trans);
	break;
      case penalty_node:
	dcg_sprintfa_dstring (data, "$PENALTY (%d)", tree -> number);
        break;
      case quasi_node:
	output_quasi_node (hnd, tree -> number, tree -> positions, tree -> nr_pos);
	break;
      default: dcg_bad_tag (tree -> kind, "output_tree");
    }
}

/*
   Output the top level affixes as possible transduction
*/
static void output_top_level_affixes (EagrtsHandle hnd)
{ Tree _root_tree = top_tree (hnd);
  Tree root_tree = _root_tree -> sons[0];
  Position *root_positions = root_tree -> positions;
  int nr_pos = root_tree -> nr_pos;
  int ix;
  for (ix = 0; ix < nr_pos; ix++)
    output_position (hnd, root_positions[ix], 1);
}

/*
   Output the parse tree as possible transduction
*/
static void output_parse_tree (EagrtsHandle hnd)
{ Tree _root_tree = top_tree (hnd);
  Tree root_tree = _root_tree -> sons[0];
  output_tree (hnd, 0, root_tree);
  dcg_append_dstring_c (hnd -> curr_result -> data, '\n');
}

/*
   If we reach this point, the first pass reached its final continuation
   First determine the accrued penalty and parsed input length to check
   whether it makes sense to start transduction.
*/
void erts_pass2 (EagrtsHandle hnd)
{ if (hnd -> transduce_option == tr_none) return;
  if (!current_parse_is_acceptable (hnd)) return;
  add_current_parse (hnd);

  switch (hnd -> transduce_option)
    { case tr_affix:	output_top_level_affixes (hnd);
      case tr_count:	break;
      case tr_tree:	output_parse_tree (hnd); break;
      case tr_user:
	dcg_abort ("erts_pass2", "Transduction is not yet available");
      default: dcg_bad_tag (hnd -> transduce_option, "erts_pass2");
    };
}

/*
   Output the results
*/
void erts_output_parse_results (EagrtsHandle hnd)
{ if (hnd -> transduce_option == tr_count)
    fprintf (hnd -> output_file, "%d parses found\n", hnd -> nr_of_results);
  else
    { ParseResult ptr;
      for (ptr = hnd -> all_results; ptr != parse_result_nil; ptr = ptr -> next)
	dcg_fprint_dstring (hnd -> output_file, ptr -> data);
    };
}

/*
   Pick the final state from the first of the parse results
*/
State erts_get_final_parse_state (EagrtsHandle hnd)
{ if (!hnd -> nr_of_results) return (state_nil);
  return (hnd -> all_results -> next_state);
}
