/*
   File: parser.h
   Defines the parser of Agfl assembler
  
   Copyright 2006 Radboud University of Nijmegen
 
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
 
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU Library General Public License for more details.
 
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

   CVS ID: "$Id: parser.c,v 1.8 2006/12/02 12:10:19 marcs Exp $"
*/

/* System includes */
#include <stdio.h>
#include <string.h>

/* Libabase includes */
#include <abase_error.h>
#include <abase_fileutil.h>
#include <abase_memalloc.h>

/* Local includes */
#include "options.h"
#include "lexer.h"
#include "parser.h"
#include "opcode.h"
#include "acoder.h"
#include "symbol_table.h"

/* Exported variables */
int pass_two;
int had_errors;

/*
   The usual is_token and should_be_token
*/
static int had_syntax_error;
static int is_token (token tk)
	{ if (curr_token == tk)
	    { read_token ();
	      return (1);
	    };
	  return (0);
	};

static void should_be_token (token tk, char *err)
	{ if (curr_token == tk) read_token ();
	  else
	    { abs_error ("'%s' expected in line %d, column %d", err, curr_line, curr_column);
	      had_syntax_error = 1;
	    };
	}

/*
   In assembler the end of line provides the synchronization point
   in case of errors.
*/
static void recover_to_end_of_line ()
	{ while (curr_token != EOLN) read_token ();
	};

static void should_be_end_of_line ()
	{ if (curr_token != EOLN)
	    { abs_error ("End of line expected in line %d, column %d", curr_line, curr_column);
	      recover_to_end_of_line ();
	    };

	  /* Eat to new line */
	  read_token ();
	};

/*
   A label is a symbol located in column 1 of a line, followed by a colon.
   In pass one we enter it into the symbol table with its segment and value
   If already present in pass one, it was entered before and hence multiply defined
   In pass two we check it with the value in the symbol table for consistency
*/
static void may_be_label_definition ()
	{ if ((curr_token == SYMBOL) && (curr_column == 1))
	    { /* We have a label in the first column, pick up current location */
	      symbol label_def;
	      segment curr_seg;
	      int curr_value;
	      get_current_location (&curr_seg, &curr_value);
	      if (pass_two)
		{ /* Check if the symbol exists, bad if so */
		  if (!lookup_symbol (curr_sval, &label_def))
		    abs_bug ("may_be_label_definition", "Undefined label %s in pass 2 at line %d",
			     curr_sval, curr_line);
		  else
		    { /* Get symbol info and compare it */
		      segment lab_seg;
		      int lab_value;
		      get_symbol_info (label_def, &lab_seg, &lab_value);
		      if ((lab_seg != curr_seg) || (lab_value != curr_value))
			abs_bug ("may_be_label_definition",
				 "Label %s changed value in pass2 at line %d",
				 curr_sval, curr_line);
		    }
		}
	      else if (!enter_symbol (curr_sval, &label_def))
		{ /* label had been entered earlier in pass one */
		  abs_error ("Multiple definition of label %s in line %d", curr_sval, curr_line);
		  had_errors = 1;
		}
	      else update_symbol_info (label_def, curr_seg, curr_value);

	      /* Eat label and colon */
	      abs_free (curr_sval, "may_be_label_definition");
	      read_token ();
	      should_be_token (COLON, ":");     
	    }; 
	};

/*
   Include the opcode type and table of names, nr of operands and acode generator
*/
#include "opcode.tab"
static int is_opcode (opcode *opc)
	{ if (curr_token == SYMBOL)
	    { int ix;
	      for (ix = 0; ix < nr_of_opcodes; ix++)
	        if (!strcmp (curr_sval, all_opcodes[ix].opcode_name))
		  { /* We have recognized the opcode; eat it away */
		    *opc = ix;
		    abs_free (curr_sval, "is_opcode");
		    read_token ();
		    return (1);
		  };

	      /* No opcode recognized, recover by returning nop */
	      abs_error ("Illegal opcode %s in line %d, column %d",
			 curr_sval, curr_line, curr_column);
	      abs_free (curr_sval, "is_opcode");
	      had_syntax_error = 1;
	      read_token ();
	      *opc = opc_nop;
	      return (1);
	    };
	  return (0);
	};

/*
   Parse operands
   An operand has a segment and a value
*/
static int is_operand (operand *opnd)
	{ switch (curr_token)
	    { case SYMBOL:
	 	{ symbol sy;
		  int found = lookup_symbol (curr_sval, &sy);
		  if (!found)
		    { if (pass_two)
			{ abs_error ("Undefined symbol %s in line %d, column %d",
		  		     curr_sval, curr_line, curr_column);
		          had_errors = 1;
			};
		      opnd -> seg = error_segment;
		      opnd -> value = 0;
		    }
		  else get_symbol_info (sy, &opnd -> seg, &opnd -> value);
		}; break;
	      case NUMBER:
	 	{ opnd -> seg = abs_segment;
	          opnd -> value = (int) curr_uval;
		}; break;
	      case SIGNED_NUMBER:
	 	{ opnd -> seg = abs_segment;
	          opnd -> value = curr_nval;
		}; break;
	      case STRING:
		{ opnd -> seg = rotext_segment;
		  opnd -> svalue = curr_sval;
		}; break;
	      default: return (0);
	    };
	  opnd -> tk = curr_token;
	  read_token ();
	  return (1);
	};

static void should_be_operand (operand *opnd)
	{ if (is_operand (opnd)) return;
	  abs_error ("operand expected in line %d, column %d", curr_line, curr_column);
	  opnd -> seg = error_segment;
	  opnd -> value = 0;
	  had_syntax_error = 1;
	};

/*
   Operand parsing
*/
static void parse_instruction_operands (opcode opc)
	{ int nr = all_opcodes[opc].nr_opnds;
	  instr ins;
	  int ix;

	  /* Fill the instruction structure */
	  ins.opc = opc;
	  ins.nr_opnds = nr;
	  for (ix = 0; !had_syntax_error && (ix < nr); ix++)
	    { should_be_operand (&ins.opnds[ix]);
	      if (ix < nr - 1) should_be_token (COMMA, ",");
	    };

	  /* Try and code the instruction */
	  code_instruction (&ins);
	};

/*
   Recognition of pseudo opcodes
*/
static void psopc_nop_parser ()
	{ /* do nothing */
	};

static void psopc_addr_parser ()
	{ operand opnd;
	  should_be_operand (&opnd);
	  code_word (&opnd);
	};

static void psopc_asciz_parser ()
	{ operand opnd;
	  if (curr_token != STRING)
	    { abs_error ("string expected in line %d, column %d", curr_line, curr_column);
	      code_empty_instruction ();
	      recover_to_end_of_line ();
	      had_errors = 1;
	    }
	  else
	    { should_be_operand (&opnd);
	      code_string (&opnd);
	    };
	};

static void psopc_byte_parser ()
	{ operand opnd;
	  should_be_operand (&opnd);
	};

static void psopc_code_parser ()
	{ set_current_segment (code_segment);
	  code_empty_instruction ();
	};

static void psopc_data_parser ()
	{ set_current_segment (data_segment);
	  code_empty_instruction ();
	};

static void psopc_int_parser ()
	{ operand opnd;
	  should_be_operand (&opnd);
	  code_word (&opnd);
	};

static void psopc_interface_parser ()
	{ set_current_segment (interface_segment);
	  code_empty_instruction ();
	};

static void psopc_rotext_parser ()
	{ set_current_segment (rotext_segment);
	  code_empty_instruction ();
	};

static void psopc_version_parser ()
	{ recover_to_end_of_line ();
	  code_empty_instruction ();
	};

/*
   Introduce pseudo opcode type and table of pseudo opcode names
   To add: freeing of operand datastructures
*/
typedef enum
	{ psopc_nop,
	  psopc_addr,
	  psopc_asciz,
	  psopc_byte,
	  psopc_code,
	  psopc_data,
	  psopc_int,
	  psopc_interface,
	  psopc_rotext,
	  psopc_version,
	} pseudo_opcode;

struct psopc_tab_rec
	{ char *name;
	  void (*psopc_parser) ();
	};

static struct psopc_tab_rec all_pseudo_ops[] =
	{{ "", 		psopc_nop_parser },
	 { "addr",	psopc_addr_parser },
	 { "asciz",	psopc_asciz_parser },
	 { "byte",	psopc_byte_parser },
	 { "code",	psopc_code_parser },
	 { "data",	psopc_data_parser },
	 { "int",	psopc_int_parser },
	 { "interface",	psopc_interface_parser },
	 { "rotext",	psopc_rotext_parser },
	 { "version",	psopc_version_parser },
	};
#define nr_of_pseudo_ops (sizeof(all_pseudo_ops)/sizeof(struct psopc_tab_rec))
	  
static int is_pseudo_opcode (pseudo_opcode *opc)
	{ if (is_token (PERIOD))
	    { /* Check if a symbols is following the . */
	      int ix;
	      if (curr_token != SYMBOL)
		{ abs_error ("Missing pseudo opcode in line %d, column %d",
			     curr_line, curr_column);
		  had_syntax_error = 1;
		  *opc = psopc_nop;
		  return (1);
		};

	      /* Check if the symbols is one of the pseudo opcodes */
	      for (ix = 0; ix < nr_of_pseudo_ops; ix++)
	        if (!strcmp (curr_sval, all_pseudo_ops[ix].name))
		  { /* We have recognized the pseudo_opcode; eat it away */
		    *opc = ix;
		    abs_free (curr_sval, "is_pseudo_opcode");
		    read_token ();
		    return (1);
		  };

	      /* No pseudo opcode recognized, recover by returning nop */
	      abs_error ("Illegal pseudo opcode %s in line %d, column %d",
			 curr_sval, curr_line, curr_column);
	      abs_free (curr_sval, "is_pseudo_opcode");
	      had_syntax_error = 1;
	      read_token ();
	      *opc = psopc_nop;
	      return (1);
	    };
	  return (0);
	};

static void parse_pseudo_instruction_operands (pseudo_opcode opc)
	{ if (had_syntax_error) recover_to_end_of_line ();
	  else
	    { all_pseudo_ops[opc].psopc_parser ();
	    };
	};

/*
   A line of assembler consists of a label, optional opcode and optional operands
*/
static void parse_line ()
	{ pseudo_opcode psopc;
	  opcode opc;
	  had_syntax_error = 0;
	  may_be_label_definition ();
	  if (is_opcode (&opc)) parse_instruction_operands (opc);
	  else if (is_pseudo_opcode (&psopc)) parse_pseudo_instruction_operands (psopc);
	  else code_empty_instruction ();
	  if (had_syntax_error)
	    { recover_to_end_of_line ();
	      had_errors = 1;
	    };
	  should_be_end_of_line ();
	};

/* Exported code */
void prepare_pass_one ()
	{ FILE *fd = abs_fopen (assembler_fname, "r");
	  init_lexer (fd, assembler_fname);
	  had_errors = 0;
	  pass_two = 0;
	  init_acoder ();
	  if (verbose)
	    abs_message ("  AGFL Assembler pass one...");
	}

void finish_pass_one ()
	{ if (!had_errors) return;
	  abs_message ("  Pass one found errors, aborting assembly...");
	  abs_exit (0);
	};

void prepare_pass_two ()
	{ had_errors = 0;
	  pass_two = 1;
	  reinit_lexer ();
	  init_acoder ();
	  if (verbose)
	    abs_message ("  AGFL Assembler pass two...");
	}

void finish_pass_two ()
	{ finish_acoder ();
	  if (generate_symbol_table) dump_symbol_table ();
	};

void parse_input ()
	{ while (curr_token != EOFSYMBOL) parse_line ();
	}

