/*
   File: erts_leaf_parser.c
   Defines the functions to parse terminals, lexicon nonterminals,
   regular expressions, the special parse functions for both the
   first and second level of the parse. 

   Copyright 2012 Marc Seutter

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.

   CVS ID: "$Id: erts_leaf_parser.c,v 1.13 2013/03/13 15:12:00 marcs Exp $"
*/

/* global includes */
#include <stdio.h>

/* libdcg includes */
#include <dcg.h>
#include <dcg_alloc.h>
#include <dcg_error.h>

/* libebase includes */
#include <ebase_version.h>
#include <ebase_lexicon.h>
#include <ebase_lexicon_impl.h>

/* local includes */
#include "erts_handle.h"
#include "erts_handle_impl.h"
#include "erts_tree.h"
#include "erts_tree_impl.h"
#include "erts_trellis.h"
#include "erts_trellis_impl.h"
#include "erts_cont.h"
#include "erts_trace.h"
#include "erts_leaf_parser.h"

/*
   Match leaf transitions iterates over the transitions from a state
   for a matching transition class and (terminal) number.

   When finding matching transitions in the trellis, it will build
   a corresponding leaf node. Optionally, it may also create an affix
   node with a text value by copying the transition data. It then
   creates a single affix position carrying the affix node in the
   lower side with the leaf node. It then pushes the tree node on
   the tree stack and calls its continuation. Upon returning, every
   action must be undone in the reverse order.
*/
static void match_transition (EagrtsHandle hnd, State state, short class, int term_nr, int nr_pos)
{ Transition trans = state -> trans;
  while (trans != transition_nil)
    { if ((trans -> class == class) && (trans -> nr == term_nr))
	{ if (hnd -> tracing) erts_trace_enter_transition (hnd, trans);
	  erts_make_leaf_node (hnd, trans, nr_pos);
	  if (hnd -> tracing) erts_trace_leave_transition (hnd, trans);
	  trans -> flags |= TRANS_USED;
	};
      trans = trans -> next;
    };
}

/*
   Parse terminal has the following calling convention

   cont_push_int (hnd, nr_pos);		 		nr of positions (0 or 1)
   cont_push_int (hnd, term_nr);			terminal number
   cont_push_continuation (hnd, parse_terminal);	to get called
*/
void erts_parse_terminal (EagrtsHandle hnd)
{ Trellis trel = hnd -> trellis;
  State curr_state = trel -> curr_state;
  int term_nr = cont_pop_int (hnd);
  int nr_pos = cont_pop_int (hnd);

  /*
     In state E, we may have additional suffixes or we make an epsilon
     transition to state W after which we may only see prefixes (or
     entire words). In state I, we should only see more prefixes or
     infixes. The scanner for lexicon entries will take this into
     account.
  */
  erts_scan_phases (trel, curr_state, 1);
  match_transition (hnd, curr_state, TransTerminal, term_nr, nr_pos);

  if (curr_state -> lex_state == LEX_STATE_E)
    { State follow_state = erts_handle_epsilon_transition (trel, curr_state);
      if (follow_state != curr_state)
        { erts_scan_phases (trel, follow_state, 1);
          match_transition (hnd, follow_state, TransTerminal, term_nr, nr_pos);
	};
    };

  /* Undo the call */
  cont_push_int (hnd, nr_pos);
  cont_push_int (hnd, term_nr);
  cont_push_continuation (hnd, erts_parse_terminal);	/* Push back self */
}

/*
   Parse word has the following calling convention

   cont_push_int (nrps);				nr of positions (0 or 1)
   cont_push_continuation (erts_parse_word);		to get called
*/
void erts_parse_word (EagrtsHandle hnd)
{ Trellis trel = hnd -> trellis;
  State curr_state = trel -> curr_state;
  State state = erts_handle_epsilon_transition (trel, curr_state);
  int nr_pos = cont_pop_int (hnd);

  /* Scan for $word transitions */
  erts_scan_phases (trel, state, 2);
  match_transition (hnd, state, TransWord, 0, nr_pos);

  /* Undo the call */
  cont_push_int (hnd, nr_pos);
  cont_push_continuation (hnd, erts_parse_word);	/* Push back self */
}

/*
   Parse match has the following calling convention

   cont_push_int (nrps);				nr of positions (0 or 1)
   cont_push_int (regexp_nr);				regular expression number
   cont_push_continuation (erts_parse_match);		to get called
*/
void erts_parse_match (EagrtsHandle hnd)
{ Trellis trel = hnd -> trellis;
  State curr_state = trel -> curr_state;
  int regexp_nr = cont_pop_int (hnd);
  int nr_pos = cont_pop_int (hnd);

  /*
     In state E, we may have additional suffixes or we make an epsilon
     transition to state W after which we may only see prefixes (or
     entire words). In state I, we should only see more prefixes or
     infixes. Note: regexp transitions, other than a full word transition
     should be rare.
  */
  erts_scan_phases (trel, curr_state, 3);
  match_transition (hnd, curr_state, TransMatch, regexp_nr, nr_pos);

  if (curr_state -> lex_state == LEX_STATE_E)
    { State follow_state = erts_handle_epsilon_transition (trel, curr_state);
      if (follow_state != curr_state)
	{ erts_scan_phases (trel, follow_state, 3);
          match_transition (hnd, follow_state, TransMatch, regexp_nr, nr_pos);
	};
    };

  /* Undo the call */
  cont_push_int (hnd, nr_pos);
  cont_push_int (hnd, regexp_nr);
  cont_push_continuation (hnd, erts_parse_match);	/* Push back self */
}

/*
   Parse skip has the following calling convention

   cont_push_int (nrps);				nr of positions (0 or 1)
   cont_push_int (regexp_nr);				regular expression number
   cont_push_continuation (ets_parse_skip);		to get called
*/
void erts_parse_skip (EagrtsHandle hnd)
{ Trellis trel = hnd -> trellis;
  State curr_state = trel -> curr_state;
  int regexp_nr = cont_pop_int (hnd);
  int nr_pos = cont_pop_int (hnd);

  /*
     In state E, we may have additional suffixes or we make an epsilon
     transition to state W after which we may only see prefixes (or
     entire words). In state I, we should only see more prefixes or
     infixes. Note: regexp transitions, other than a full word transition
     should be rare.
  */
  erts_scan_phases (trel, curr_state, 4);
  match_transition (hnd, curr_state, TransSkip, regexp_nr, nr_pos);

  if (curr_state -> lex_state == LEX_STATE_E)
    { State follow_state = erts_handle_epsilon_transition (trel, curr_state);
      if (follow_state != curr_state)
        { erts_scan_phases (trel, follow_state, 4);
          match_transition (hnd, follow_state, TransSkip, regexp_nr, nr_pos);
	};
    };

  /* Undo the call */
  cont_push_int (hnd, nr_pos);
  cont_push_int (hnd, regexp_nr);
  cont_push_continuation (hnd, erts_parse_skip);	/* Push back self */
}

/*
   Parse other has the following calling convention

   cont_push_int (nrps);				nr of positions (0 or 1)
   cont_push_continuation (erts_parse_other);		to get called
*/
void erts_parse_other (EagrtsHandle hnd)
{ Trellis trel = hnd -> trellis;
  State curr_state = trel -> curr_state;
  State state = erts_handle_epsilon_transition (trel, curr_state);
  int nr_pos = cont_pop_int (hnd);

  /* Scan for $other transitions */
  erts_scan_phases (trel, state, 5);
  match_transition (hnd, state, TransOther, 0, nr_pos);

  /* Undo the call */
  cont_push_int (hnd, nr_pos);
  cont_push_continuation (hnd, erts_parse_other);	/* Push back self */
}

/*
   Parse any has the following calling convention

   cont_push_int (nrps);				nr of positions (0 or 1)
   cont_push_continuation (erts_parse_any);		to get called
*/
void erts_parse_any (EagrtsHandle hnd)
{ Trellis trel = hnd -> trellis;
  State curr_state = trel -> curr_state;
  State state = erts_handle_epsilon_transition (trel, curr_state);
  int nr_pos = cont_pop_int (hnd);

  /* Scan for $any transitions */
  erts_scan_phases (trel, state, 5);
  erts_scan_any_transition (trel, state);
  match_transition (hnd, state, TransAny, 0, nr_pos);

  /* Undo the call */
  cont_push_int (hnd, nr_pos);
  cont_push_continuation (hnd, erts_parse_any);		/* Push back self */
}

/*
   Match lex nonterminal transitions iterates over the transitions from a state
   for a matching transition class and (lexicon nonterminal) number.

   When finding matching transitions in the trellis, it will build a corresponding
   lexicon nonterminal node together with the positions corresponding to the
   signature of this lexicon nonterminal call. At the lower side of these positions
   the affix values corresponding with the call will be attached.
   It then pushes the tree node on the tree stack and calls its continuation.
   Upon returning, every action must be undone in the reverse order.
*/
static void match_lex_nont_transition (EagrtsHandle hnd, State state, int lex_nont_nr)
{ Transition trans = state -> trans;
  while (trans != transition_nil)
    { if ((trans -> class == TransLexNont) && (trans -> nr == lex_nont_nr))
	{ if (hnd -> tracing) erts_trace_enter_transition (hnd, trans);
	  erts_make_lex_nont_node (hnd, trans);
	  if (hnd -> tracing) erts_trace_leave_transition (hnd, trans);
	  trans -> flags |= TRANS_USED;
	};
      trans = trans -> next;
    };
}

/*
   Parse lexicon nonterminal has the following calling convention

   cont_push_int (lex_nont_nr);				The lexicon nonterminal number
   cont_push_continuation (erts_parse_lex_nont);	to get called
*/
void erts_parse_lex_nont (EagrtsHandle hnd)
{ Trellis trel = hnd -> trellis;
  State curr_state = trel -> curr_state;
  int lex_nont_nr = cont_pop_int (hnd);

  /*
     In state E, we may have additional suffixes or we make an epsilon
     transition to state W after which we may only see prefixes (or
     entire words). In state I, we should only see more prefixes or
     infixes. Note: lexicon nonterminal transitions, other than a full
     word transition are rare (about 20 suffixes and 30 (chemical) prefixes 
     in the npx grammar). However for agglutinative languages (Japanese,
     Klingon), this may be a more common phenomemon.
  */
  erts_scan_phases (trel, curr_state, 1);
  match_lex_nont_transition (hnd, curr_state, lex_nont_nr);

  if (curr_state -> lex_state == LEX_STATE_E)
    { State follow_state = erts_handle_epsilon_transition (trel, curr_state);
      if (follow_state != curr_state)
        { erts_scan_phases (trel, follow_state, 1);
          match_lex_nont_transition (hnd, follow_state, lex_nont_nr);
	};
    }

  /* Undo the call */
  cont_push_int (hnd, lex_nont_nr);
  cont_push_continuation (hnd, erts_parse_lex_nont);	/* Push back self */
}

/*
   Parse end of text has the following calling convention

   cont_push_int (0);					nr of positions (0)
   cont_push_continuation (erts_parse_eot);		to get called
*/
void erts_parse_eot (EagrtsHandle hnd)
{ int nr_ps = cont_pop_int (hnd);
  Trellis trel = hnd -> trellis;
  State curr_state = trel -> curr_state;
  Transition trans = erts_scan_eos_transition (trel, curr_state);
  if (trans != transition_nil)
    { if (hnd -> tracing) erts_trace_enter_transition (hnd, trans);
      erts_make_leaf_node (hnd, trans, 0);
      if (hnd -> tracing) erts_trace_leave_transition (hnd, trans);
      trans -> flags |= TRANS_USED;
    };
  cont_push_int (hnd, nr_ps);
  cont_push_continuation (hnd, erts_parse_eot);		/* Push back self */
}
