/*
   File: ebase_runtime_utils.c
   Runtime utilities

   Copyright (C) 2012 Marc Seutter

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.

   CVS ID: "$Id: ebase_utils.c,v 1.11 2012/12/27 17:59:49 marcs Exp $
*/

/* standard includes */
#include <stdio.h>
#include <string.h>

/* libdcg includes */
#include <dcg.h>
#include <dcg_error.h>
#include <dcg_alloc.h>
#include <dcg_string.h>
#include <dcg_dstring.h>

/* libeagbase includes */
#include "ebase_ds.h"
#include "ebase_affix_value_utils.h"
#include "ebase_utils.h"
#include "ebase_lexicon_impl.h"

/*
   Simple convertors
*/
char *ebs_string_from_operator (operator op)
{ switch (op)
    { case times:               return ("*");
      case modulo:              return ("%%");
      case divides:             return ("%");
      case plus:                return ("+");
      case minus:               return ("-");
      case a_union:		return ("|");
      case a_part:		return ("&");
      case shift_left:          return ("<<");
      case shift_right:         return (">>");
      case bitwise_xor:         return ("^");
      case bitwise_not:         return ("~");
      /* identified operators */
      case bitwise_or:          return ("|");
      case bitwise_and:         return ("&");
      case int_times_int:       return ("*");
      case real_times_real:     return ("*");
      case int_times_text:      return ("*");
      case int_plus_int:        return ("+");
      case real_plus_real:      return ("+");
      case text_plus_text:      return ("+");
      case int_minus_int:       return ("-");
      case real_minus_real:     return ("-");
      default: dcg_bad_tag (op, "string_from_operator");
    };
  return ("");
}

char *ebs_string_from_propagation_kind (propagation_kind kind)
{ switch (kind)
    { case p_bidirectional:	return ("bidirectional");
      case p_left_to_right:	return ("left to right");
      case p_lower_to_upper:	return ("lower to upper");
      case p_equal:		return ("equality");
      case p_unequal:		return ("unequality");
      case p_restrict:		return ("restricting");
      default: dcg_bad_tag (kind, "string_from_propagation_kind");
    };
  return (string_nil);
}

/*
   Utilities for UTF8 conversions
*/
string ebs_convert_non_utf8_string (int_list char_encs)
{ char *ret_ptr = (char *) dcg_malloc (char_encs -> size + 1); /* Allocate sufficient space */
  char *ptr = ret_ptr;
  int ix;
  for (ix = 0; ix < char_encs -> size; ix++)
    { int char_enc = char_encs -> array[ix];
      if (char_enc < 256) *ptr++ = (char) char_enc;
      else dcg_abort ("ebs_convert_non_utf8_string", "too large encoding");
    };
  *ptr = '\0';
  return (ret_ptr);
}

/*
   Utilities for UTF8 conversions
*/
string ebs_convert_utf8_char (int char_enc)
{ char buf[8];		/* Sufficiently large and aligned */
  char *ptr = buf;
  if (char_enc < 128)
    { /* First 128 characters of UTF-8 */
      *ptr++ = (char) char_enc;
    }
  else if (char_enc < 0x0800)
    { /* one continuation byte */
      *ptr++ = (char)(0xC0 | ((char_enc >> 6) & 0x1F));
      *ptr++ = (char)(0x80 | (char_enc & 0x3F));
    }
  else if (char_enc < 0x10000)
    { /* two continuation bytes */
      *ptr++ = (char)(0xE0 | ((char_enc >> 12) & 0x0F));
      *ptr++ = (char)(0x80 | ((char_enc >> 6) & 0x3F));
      *ptr++ = (char)(0x80 | (char_enc & 0x3F));
    }
  else if (char_enc < 0x200000)
    { /* three continuation bytes */
      *ptr++ = (char)(0xF0 | ((char_enc >> 16) & 0x7));
      *ptr++ = (char)(0x80 | ((char_enc >> 12) & 0x3F));
      *ptr++ = (char)(0x80 | ((char_enc >> 6) & 0x3F));
      *ptr++ = (char)(0x80 | (char_enc & 0x3F));
    }
  else dcg_abort ("ebs_convert_utf8_char", "too large utf-8 encoding");
  *ptr = '\0';
  return (new_string (buf));
}

string_list ebs_convert_utf8_chars (int_list char_encs)
{ int ix;
  string_list strs = init_string_list (char_encs -> size);
  for (ix = 0; ix < char_encs -> size; ix++)
    app_string_list (strs, ebs_convert_utf8_char (char_encs -> array[ix]));
  return (strs);
}

string ebs_convert_utf8_string (int_list char_encs)
{ char *back_cvt = (char *) dcg_malloc (char_encs -> size * 4 + 1); /* Allocate sufficient space */
  char *ptr = back_cvt;
  char *ret_ptr;
  int ix;
  for (ix = 0; ix < char_encs -> size; ix++)
    { int char_enc = char_encs -> array[ix];
      if (char_enc < 128)
	{ /* First 128 characters of UTF-8 */
	  *ptr++ = (char) char_enc;
	}
      else if (char_enc < 0x0800)
	{ /* one continuation byte */
	  *ptr++ = (char)(0xC0 | ((char_enc >> 6) & 0x1F));
	  *ptr++ = (char)(0x80 | (char_enc & 0x3F));
	}
      else if (char_enc < 0x10000)
	{ /* two continuation bytes */
	  *ptr++ = (char)(0xE0 | ((char_enc >> 12) & 0x0F));
	  *ptr++ = (char)(0x80 | ((char_enc >> 6) & 0x3F));
	  *ptr++ = (char)(0x80 | (char_enc & 0x3F));
	}
      else if (char_enc < 0x200000)
	{ /* three continuation bytes */
	  *ptr++ = (char)(0xF0 | ((char_enc >> 16) & 0x7));
	  *ptr++ = (char)(0x80 | ((char_enc >> 12) & 0x3F));
	  *ptr++ = (char)(0x80 | ((char_enc >> 6) & 0x3F));
	  *ptr++ = (char)(0x80 | (char_enc & 0x3F));
	}
      else dcg_abort ("ebs_convert_utf8_string", "too large utf-8 encoding");
    };
  *ptr = '\0';
  ret_ptr = new_string (back_cvt);
  dcg_detach ((void **) &back_cvt);
  return (ret_ptr);
}

/*
   MS: Note that we may need to take care of special characters in text values
*/
static void ebs_dump_char (FILE *info, char ch)
{ unsigned int ich = (unsigned int) ch;
  switch (ch)
    { case '\f': fprintf (info, "\\f"); break;
      case '\n': fprintf (info, "\\n"); break;
      case '\r': fprintf (info, "\\r"); break;
      case '\t': fprintf (info, "\\t"); break;
      case '"': fprintf (info, "\\\""); break;
      default:
	if ((ich < 32) || (ich > 127))
	  fprintf (info, "\\x%02x", (ich & 0xff));
	else fputc (ch, info);
    };
}

void ebs_dump_text (FILE *info, char *text)
{ char *ptr;
  fputc ('"', info);
  for (ptr = text; *ptr; ptr++)
    ebs_dump_char (info, *ptr);
  fputc ('"', info);
}

void ebs_dump_text_from_to (FILE *info, char *from, char *to)
{ char *ptr;
  fputc ('"', info);
  for (ptr = from; ptr < to; ptr++)
    ebs_dump_char (info, *ptr);
  fputc ('"', info);
}

static void ebs_dump_lattice (FILE *info, affix_value value, rt_domain dom)
{ affix_value dupl = rdup_affix_value (value);
  rt_element_list elts = dom -> elts;
  int first = 1;
  int ix;
  fprintf (info, "{");
  for (ix = 0; ix < elts -> size; ix++)
    { rt_element elt = elts -> array[ix];
      affix_value diff;
      if (!ebs_lattice_value_is_subset (elt -> value, dupl)) continue;
      if (!first) fprintf (info, "|");
      first = 0;
      fprintf (info, "%s", elt -> name);
      if (!ebs_diff_lattice_values (dupl, elt -> value, &diff)) break;
      detach_affix_value (&dupl);
      dupl = diff;
    };
  fprintf (info, "}");
  detach_affix_value (&dupl);
}

void ebs_dump_affix_value (FILE *info, affix_value value, rt_domain_list domains)
{ if (value == affix_value_nil)
    { fprintf (info, "(affix_value_nil)");
      return;
    };
  if (value -> rule_nr != -1)
    fprintf (info, "[R%d]", value -> rule_nr);
  switch (value -> tag)
    { case TAGNull_value: fprintf (info, "(Null)"); break;
      case TAGText_value: ebs_dump_text (info, value -> Text_value.text); break;
      case TAGInt_value:  fprintf (info, "%d", value -> Int_value.ival); break;
      case TAGReal_value: fprintf (info, "%g", value -> Real_value.rval); break;
      case TAGSmall_lattice:
	{ rt_domain rdom = domains -> array[value -> Small_lattice.dom];
	  ebs_dump_lattice (info, value, rdom);
	}; break;
      case TAGLarge_lattice:
	{ rt_domain rdom = domains -> array [value -> Large_lattice.dom];
	  ebs_dump_lattice (info, value, rdom);
	}; break;
      case TAGComposed_value:
	{ int ix;
	  affix_value_list parts = value -> Composed_value.parts;
	  fprintf (info, "[Comp %d: ", value -> Composed_value.marker);
	  for (ix = 0; ix < parts -> size; ix++)
	    { if (ix) fprintf (info, ",");
	      ebs_dump_affix_value (info, parts -> array[ix], domains);
	    };
	  fprintf (info, " ]");
	}; break;
      default: dcg_bad_tag (value -> tag, "ebs_dump_value");
    };
}

void ebs_dump_domain (FILE *info, int idx, rt_domain dom, int wval)
{ rt_element_list rt_elts = dom -> elts;
  int iy;
  fprintf (info, "Domain %d (width = %d):\n", idx, dom -> width);
  for (iy = 0; iy < rt_elts -> size; iy++)
    { rt_element elt = rt_elts -> array[iy];
      fprintf (info, "  Element %d: %s", iy, elt -> name);
      if (wval)
        { fprintf (info, ", value = ");
          pp_affix_value (info, elt -> value);
        };
      fprintf (info, "\n");
    };
}

static void ebs_dump_alts (FILE *info, rt_alt_list alts)
{ int ix;
  for (ix = 0; ix < alts -> size; ix++)
    { rt_alt alt = alts -> array[ix];
      rt_elem_list elems = alt -> elems;
      int iy;
      fprintf (info, "  ");
      for (iy = 0; iy < elems -> size; iy++)
	{ rt_elem elem = elems -> array[iy];
	  switch (elem -> tag)
	    { case TAGMarker: fprintf (info, "%s", elem -> Marker.txt); break;
	      case TAGRef: fprintf (info, "%d", elem -> Ref.rnr); break;
	      default: dcg_bad_tag (elem -> tag, "ebs_dump_alts");
	    };
	  fprintf (info, " ");
	};
      fprintf (info, "\n");
    }
}

/*
   Note that the runtime types are always saved with a value.
   This may be the undefined value though
*/
void ebs_dump_type (FILE *info, rt_type rt, int wval)
{ affix_value value = rt -> value;
  fprintf (info, "Type nr %d %s: ", rt -> rule_nr, rt -> name); 
  switch (rt -> tag)
    { case TAGSynonym_type:
	fprintf (info, "synonym of %d\n", rt -> Synonym_type.snr);
	break;
      case TAGAny_type:
	fprintf (info, "any (polymorfic acceptor)\n");
	break;
      case TAGInt_type:
	fprintf (info, "integer");
	if (value -> tag != TAGNull_value)
	  { fprintf (info, ", with constant value ");
            if (wval) pp_affix_value (info, value);
	  };
        fprintf (info, "\n");
	break;
      case TAGReal_type:
	fprintf (info, "real");
	if (value -> tag != TAGNull_value)
	  { fprintf (info, ", with constant value ");
	    if (wval) pp_affix_value (info, value);
	  };
        fprintf (info, "\n");
	break;
      case TAGText_type:
	fprintf (info, "text");
	if (value -> tag != TAGNull_value)
          { fprintf (info, ", with constant value ");
            if (wval) pp_affix_value (info, value);
          }
	else if (!rt -> lexgen)
	  fprintf (info, ", with meta defined substructure");
        fprintf (info, "\n");
	break;
      case TAGLattice_type:
	fprintf (info, "lattice over domain %d", rt -> Lattice_type.dom);
	if (wval)
	  { fprintf (info, ", with value ");
	    pp_affix_value (info, value);
	  };
	fprintf (info, "\n");
	break;
      case TAGTree_type:
	fprintf (info, "tree");
	if (rt -> lexgen) fprintf (info, ", enumerable");
	fprintf (info, "\n");
	ebs_dump_alts (info, rt -> Tree_type.alts);
	break;
      default: dcg_bad_tag (rt -> tag, "ebs_dump_type");
    };
}

void ebs_dump_lex_nont (FILE *info, lex_nont lt)
{ int_list name_chars = lt -> name_chars;
  int nidx = 0, fidx = 0, in_args = 0, ix;
  fprintf (info, "Rule %d: ", lt -> rule_nr);
  for (ix = 0; ix < name_chars -> size; ix++)
    if (name_chars -> array[ix])
      { /* dump name part */
	if (in_args) fprintf (info, ") ");
	else if (nidx) fprintf (info, " ");
	fprintf (info, "%s", lt -> name_parts -> array[nidx]);
	in_args = 0;
	nidx++;
      }
    else
      { /* dump formal argument */
	if (in_args) fprintf (info, ", ");
	else fprintf (info, " (");
	if (lt -> crits -> array[fidx]) fprintf (info, ">TEXT");
	else fprintf (info, "%d", lt -> formals -> array[fidx]);
	in_args = 1;
	fidx++;
      }
  if (in_args) fprintf (info, ")");
  if (lt -> fact_nr >= 0)
    fprintf (info, ", fact %d", lt -> fact_nr);
}

void ebs_dump_call (FILE *info, Lexicon lex, int call_id)
{ int fidx = 0, nidx = 0, in_args = 0;
  int_list my_call, name_chars;
  int nont_id, ix;
  lex_nont lt;
  if ((call_id < 0) || (call_id >= lex -> rt_lex_calls -> size))
    dcg_internal_error ("ebs_dump_call");
  my_call = lex -> rt_lex_calls -> array[call_id];
  nont_id = my_call -> array[0];
  if (nont_id >= lex -> rt_lex_nonts -> size)
    { /* Call is terminal */
      fprintf (info, "Terminal %d\n", nont_id - lex -> rt_lex_nonts -> size);
      return;
    };

  lt = lex -> rt_lex_nonts -> array[nont_id];
  name_chars = lt -> name_chars;
  for (ix = 0; ix < name_chars -> size; ix++)
    if (name_chars -> array[ix])
      { /* dump name part */
	if (in_args) fprintf (info, ") ");
	else if (nidx) fprintf (info, " ");
	fprintf (info, "%s", lt -> name_parts -> array[nidx]);
	in_args = 0;
	nidx++;
      }
    else
      { /* dump formal argument */
	if (in_args) fprintf (info, ", ");
	else fprintf (info, " (");
	if (lt -> crits -> array[fidx]) fprintf (info, ">TEXT");
	else
	  { affix_value value = lex -> rt_values -> array[my_call -> array[fidx + 1]];
	    ebs_dump_affix_value (info, value, lex -> rt_domains);
	  };
	in_args = 1;
	fidx++;
      }
  if (in_args) fprintf (info, ")");
}
