/*
   File: arts_loader.c
   Loads the abstract machine program from object file into datastructures
   Implements the interface between generator and runtime system

   Copyright 2006 Radboud University of Nijmegen
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
 
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU Library General Public License for more details.
 
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

   CVS ID: "$Id: arts_loader.c,v 1.18 2009/02/25 09:59:12 olafs Exp $"
*/

/* If we have a config.h, include it */
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /* HAVE_CONFIG_H */

/* standard includes */
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

/* libabase includes */
#include <abase_error.h>
#include <abase_memalloc.h>
#include <abase_fileutil.h>

/* gen/as includes */
#include <opcode.h>

/* local includes */
#include "arts_ds.h"
#include "arts_loader.h"

static void maybe_skip_shebang_line (BinFile object)
{ char she, bang;
  abs_bin_load_char (object, &she);
  abs_bin_load_char (object, &bang);

  if (she == '#' && bang == '!')
  { char nl;
    do {
      abs_bin_load_char (object, &nl);
    } while (nl != '\n');
  } else
  { abs_bin_seek (object, -2, SEEK_CUR);
  }
  abs_bin_reset_checksum (object);
}

#define OBJECT_SUFFIX "aob"
#define OBJECT_KIND "object"
static BinFile open_object_code ()
{ char *object_fname = abs_new_fmtd_string ("arts_load_agfl_object_code", "%s.%s",
					    arts_ifd.grammar_name, OBJECT_SUFFIX);
  BinFile object = abs_bin_fopen (object_fname, "r");
  maybe_skip_shebang_line (object);
  abs_bin_verify_version (object, OBJECT_KIND);
  abs_free (object_fname, "arts_load_agfl_object_code");
  return (object);
};

static void load_sizes_and_allocate_segments (BinFile object)
{ int isize;
  /* Code segment */
  abs_bin_load_int (object, &isize);
  arts_ifd.code_size = (size_t) isize;
  arts_ifd.code = abs_malloc (arts_ifd.code_size, "load_sizes_and_allocate_segments");

  /* Interface segment: not needed -> to be deleted */
  abs_bin_load_int (object, &isize);

  /* Data segment */
  abs_bin_load_int (object, &isize);
  arts_ifd.data_size = (size_t) isize;
  arts_ifd.data = abs_malloc (arts_ifd.data_size, "load_sizes_and_allocate_segments");

  /* Text segment */
  abs_bin_load_int (object, &isize);
  arts_ifd.rotext_size = (size_t) isize;
  arts_ifd.rotext = abs_malloc (arts_ifd.rotext_size, "load_sizes_and_allocate_segments");
};

/*
   Load an integer
*/
static int ilc;
static void load_absolute_int (BinFile object, int *data)
{ char ch;
  int value;
  abs_bin_load_char (object, &ch);
  if (ch)
    abs_bug ("load_absolute_int", "absolute integer expected in binary");
  abs_bin_load_int (object, &value);
  ilc += 8;
  *data = value;
};

/*
   Load and relocate address
   An operand/word is saved as a tuple (char segment, int value)
*/
static void load_address (BinFile object, DATA **data)
{ char *base = NULL;
  char ch;
  int offset;
  abs_bin_load_char (object, &ch);
  switch (ch)
    { case 1: base = (char *) arts_ifd.code; break;
      case 3: base = (char *) arts_ifd.data; break;
      case 4: base = (char *) arts_ifd.rotext;
      case 0: break;
      default: abs_bug ("load_address", "address of data or text expected in binary");
    };
  abs_bin_load_int (object, &offset);
  if ((ch == 0) && (offset != 0))
    abs_bug ("load_address", "Found non zero absolute address");
  *data = (DATA *) (base + offset);
};

/*
   Load and relocate word
   The word is immediately written into the code or data segment
*/
static void load_agfl_word (BinFile object, int data, int *lc)
{ char ch;
  int offset;
  DATA word;
  word.lval = 0;
  abs_bin_load_char (object, &ch);
  abs_bin_load_int (object, &offset);
  switch (ch)
    { case 1:	word.str = (((char *) arts_ifd.code) + offset); break;
      case 3:	word.str = (((char *) arts_ifd.data) + offset); break;
      case 4:	word.str = (arts_ifd.rotext + offset); break;
      default:	if (offset == -1) word.lval = -1L;		/* Sign extend for -1 */
		else if (offset == -2) word.lval = -2L;		/* Sign extend for -2 */
		else word.ival = offset;                	/* but no one else */
    };

  /* Note: array access scales */
  if (data) arts_ifd.data[*lc] = word;
  else arts_ifd.code[*lc] = word;
  *lc = *lc + 1;
};

/*
   Loading of instructions
   An AGFL instruction is saved in the AOB with the following format:
 
   char opcode;
   int  nr_opnds;
   foreach operand
      { char segment;
        int value;
      }
*/
static void load_agfl_instruction (BinFile object, int *lc)
{ char ch;
  int opcode, nr_of_opnds, ix;
  abs_bin_load_char (object, &ch);
  opcode = ((int) ch) & 0xff;
  abs_bin_load_int (object, &nr_of_opnds);

  /* Note: array access scales */
  arts_ifd.code[*lc].lval = 0L;
  arts_ifd.code[*lc].ival = ((nr_of_opnds << 16) | opcode);
  *lc = *lc + 1;
  for (ix = 0; ix < nr_of_opnds; ix++)
     load_agfl_word (object, 0, lc);
};

static void load_agfl_instructions (BinFile object)
{ int lc = 0;
  while (lc * sizeof (DATA) < arts_ifd.code_size)
    load_agfl_instruction (object, &lc);
};

/*
   The interface section
*/
void load_interface_section (BinFile object)
{ /* numeric interface */
  int black_hole;
  load_absolute_int (object, &arts_ifd.gra_version);
  load_absolute_int (object, &arts_ifd.nr_lexicon_nonterminals);
  load_absolute_int (object, &arts_ifd.nr_terminals);
  load_absolute_int (object, &arts_ifd.nr_match_regexps);
  load_absolute_int (object, &arts_ifd.nr_skip_regexps);
  load_absolute_int (object, &arts_ifd.nr_syntax_nonterminals);
  load_absolute_int (object, &arts_ifd.nr_neg_memos);
  load_absolute_int (object, &arts_ifd.nr_choices);
  load_absolute_int (object, &arts_ifd.nr_positions);
  load_absolute_int (object, &arts_ifd.eos_terminal);
  load_absolute_int (object, &arts_ifd.lexicon_used);

  /* option interface */
  load_absolute_int (object, &arts_ifd.neg_memo_option);
  load_absolute_int (object, &arts_ifd.directors_option);
  load_absolute_int (object, &black_hole);			/* log option */
  load_absolute_int (object, &arts_ifd.counters_option);
  load_absolute_int (object, &arts_ifd.profile_option);
  load_absolute_int (object, &arts_ifd.trace_option);
  load_absolute_int (object, &arts_ifd.generate_option);
  load_absolute_int (object, &arts_ifd.segment_mode);

  /* text options */
  load_address (object, (DATA **) &arts_ifd.alphabet_fname);
  load_address (object, (DATA **) &arts_ifd.white_space_chars);
  load_address (object, (DATA **) &arts_ifd.word_terminator_chars);

  /* table interface */
  load_address (object, (DATA **) &arts_ifd.affix_domains);
  load_address (object, (DATA **) &arts_ifd.affix_weights);
  load_address (object, (DATA **) &arts_ifd.nont_domains);
  load_address (object, (DATA **) &arts_ifd.term_memo_dir);
  load_address (object, (DATA **) &arts_ifd.match_memo_dir);
  load_address (object, (DATA **) &arts_ifd.skip_memo_dir);
  load_address (object, (DATA **) &arts_ifd.lex_memo_dir);
  load_address (object, (DATA **) &arts_ifd.lrec_rules_table);
  load_address (object, (DATA **) &arts_ifd.lex_nont_nrs_table);
  load_address (object, (DATA **) &arts_ifd.affix_names);
  load_address (object, (DATA **) &arts_ifd.term_names);
  load_address (object, (DATA **) &arts_ifd.nonterm_names);
  load_address (object, (DATA **) &arts_ifd.match_regexp_names);
  load_address (object, (DATA **) &arts_ifd.skip_regexp_names);
  load_address (object, (DATA **) &arts_ifd.alternatives_profile_table);
  load_address (object, (DATA **) &arts_ifd.term_posmemo_dir);
  load_address (object, (DATA **) &arts_ifd.match_posmemo_dir);
  load_address (object, (DATA **) &arts_ifd.skip_posmemo_dir);
  load_address (object, (DATA **) &arts_ifd.lex_posmemo_dir);
  load_address (object, (DATA **) &arts_ifd.other_posmemo_dir);
}

static void load_data_segment (BinFile object)
{ int lc = 0;
  while (lc * sizeof (DATA) < arts_ifd.data_size)
    load_agfl_word (object, 1, &lc);
}

static void load_rotext_segment (BinFile object)
{ int lc;
  /* Comparison with size_t */
  for (lc = 0; lc < arts_ifd.rotext_size; lc++)
    { char ch;
      abs_bin_load_char (object, &ch);
      arts_ifd.rotext[lc] = ch;
    };
}

void arts_load_agfl_object_code ()
{ /* Parse the object file */
  BinFile object = open_object_code ();
  load_sizes_and_allocate_segments (object);
  load_agfl_instructions (object);
  load_interface_section (object);
  load_data_segment (object);
  load_rotext_segment (object);
  abs_bin_verify_eof (object);
  abs_bin_fclose (object);
};

static void try_link_instruction (int *lc)
{ /* Pick up opcode and nr of opnds; update lc */
  CODE *curr_lc = &arts_ifd.code[*lc];
  int opc = (curr_lc -> ival) & 0xff;
  int nr_of_opnds = (curr_lc -> ival) >> 16;
  int linked_oprd;
  *lc += (nr_of_opnds + 1);

  /* For those operands that match with the trellis, encode the class */
  switch ((opcode) opc)
    { case opc_match:		linked_oprd = ENCODE_TERM (curr_lc[1].ival); break;
      case opc_match_re:	linked_oprd = ENCODE_MATCH (curr_lc[1].ival); break;
      case opc_skip_re:		linked_oprd = ENCODE_SKIP (curr_lc[1].ival); break;
      case opc_match_other:	linked_oprd = ENCODE_OTHER (curr_lc[1].ival); break;
      default: return;
    };
  curr_lc[1].ival = linked_oprd;
};

void arts_link_lexicon ()
{ int lc = 0;
  while (lc * sizeof (CODE) < arts_ifd.code_size)
    try_link_instruction (&lc);
}
