/*
   File: erts_io.c
   Defines the basic transput routines for the EAG3 runtime system.

   Copyright 2012 Marc Seutter

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.

   CVS ID: "$Id: erts_io.c,v 1.5 2012/12/27 17:59:52 marcs Exp $"
*/

/* global includes */
#include <stdio.h>
#include <stdlib.h>

#ifndef WIN32
#include <unistd.h>
#endif

/* libdcg includes */
#include <dcg.h>
#include <dcg_alloc.h>
#include <dcg_error.h>
#include <dcg_fileutil.h>

/* libebase includes */
#include <ebase_version.h>
#include <ebase_input.h>
#include <ebase_lexicon.h>
#include <ebase_lexicon_impl.h>

/* local includes */
#include "erts_handle.h"
#include "erts_handle_impl.h"
#include "erts_io.h"

/*
   Initialization and finalization
*/
void erts_init_io (EagrtsHandle hnd)
{
  /* Open the input file */
  if (hnd -> input_fname == NULL) hnd -> input_file = stdin;
  else hnd -> input_file = dcg_fopen (hnd -> input_fname, "r");

  /* Open the output file */
  if (hnd -> output_fname == NULL) hnd -> output_file = stdout;
  else hnd -> output_file = dcg_fopen (hnd -> output_fname, "w");

  /* Judge whether to prompt (on stderr) */
  hnd -> interactive = isatty (fileno (hnd -> input_file)) &&
		       isatty (fileno (stderr));

  /* Setup input buffer */
  switch (hnd -> input_mode)
    { case LineInputMode:	hnd -> input_size = 256; break;
      case ParagraphInputMode:	hnd -> input_size = 2048; break;
      case DocumentInputMode:	hnd -> input_size = 8192; break;
      default: dcg_bad_tag (hnd -> input_mode, "erts_init_io");
    };

  hnd -> input_buffer = (char *) dcg_malloc (hnd -> input_size);
}

void erts_finish_io (EagrtsHandle hnd)
{ fclose (hnd -> input_file);
  fclose (hnd -> output_file);
}

/*
   Read from input and append to dynamic string.
   When in utf8_processing mode, read the correct number of bytes in one call
   and update line number and column number accordingly. Make sure that we
   can always add a null byte.
*/
static void append_input_char (EagrtsHandle hnd, char ch)
{ /* Check if there is enough space for next char */
  if (hnd -> input_bufptr - hnd -> input_buffer == hnd -> input_size - 2)
    { int diff = hnd -> input_size - 2;
      hnd -> input_size *= 4;
      dcg_realloc ((void **) &hnd -> input_buffer, hnd -> input_size);
      hnd -> input_bufptr = hnd -> input_buffer + diff;
    };
  *hnd -> input_bufptr = ch;
  hnd -> input_bufptr++;
}

static int read_input_char (EagrtsHandle hnd)
{ int inch = fgetc (hnd -> input_file);
  int nr_cbytes = 0;

  /* EOF: done */
  if (inch == EOF) return (inch);
  append_input_char (hnd, (char)(unsigned char)(unsigned int) inch);

  /* Update input position */
  if (inch == '\n')
    { hnd -> linenr++;
      hnd -> colnr = 1;
      return (inch);
    }
  hnd -> colnr++;

  /* Check if we are processing UTF8 */
  if (!hnd -> lexicon -> utf8_processing || (inch < 0x80))
    return (inch);

  /* Determine the required number of UTF8 continuation bytes */
  if ((inch & 0xE0) == 0xC0) nr_cbytes = 1;
  else if ((inch & 0xF0) == 0xE0) nr_cbytes = 2;
  else if ((inch & 0xF8) == 0xF0) nr_cbytes = 3;
  else dcg_abort ("read_input_char", "Input contains bad UTF8 character %02x", inch);

  /* Read the continuation byte, while checking UTF8 syntax */
  while (nr_cbytes > 0)
    { int inch2 = fgetc (hnd -> input_file);
      if (inch2 == EOF)
        dcg_abort ("read_input_char", "Encountered EOF while expecting UTF8 continuation byte");
      if ((inch & 0xC0) != 0x80)
	dcg_abort ("read_input_char", "Encountered bad UTF8 continuation byte %02x", inch);
      append_input_char (hnd, (char)(unsigned char)(unsigned int) inch2);
    };
  return (inch);
}

int erts_read_input (EagrtsHandle hnd)
{ char *prev_newline = NULL;
  int seen_end_chunk = 0;

  /* If we have seen end of file, we're done parsing */
  if (hnd -> seen_eof) return (0);

  /* Copy input position */
  hnd -> input_bufptr = hnd -> input_buffer;
  hnd -> input_linenr = hnd -> linenr;
  hnd -> input_colnr = hnd -> colnr;

  /* Prompt if input from terminal */
  if (hnd -> interactive)
    { fprintf (stderr, ">> ");
      fflush (stderr);
    };

  /* Get input line/paragraph/document */
  while (!seen_end_chunk && !feof (hnd -> input_file))
    { int inch = read_input_char (hnd);
      switch (inch)
	{ case EOF:
	    seen_end_chunk = 1;
	    break;
	  case 0: dcg_abort ("erts_read_input", "Input contains null bytes");
	  case 1:
	    /* LCS document separator */
            if (hnd -> lcsdoc_sync_option && (inch == 0x01))
	      { hnd -> seen_doc_sync = 1;
		seen_end_chunk = 1;
	      };
	    break;
	  case '\n':
	    switch (hnd -> input_mode)
	      { char *ptr;
		int wsp;
		case LineInputMode:
		  { /* Handle special case of null input */
		    if (hnd -> interactive && (hnd -> input_bufptr == hnd -> input_buffer + 1))
		      return (0);

		    /*
		       Otherwise, check if there is only white space between the
		       begin of the buffer and this \n: note that this only
		       succeeds if \n is white space
		    */
		    *hnd -> input_bufptr = '\0';	/* Mark end of check */
		    ptr = hnd -> input_buffer;
		    wsp = ebs_is_white_spaces (hnd -> lexicon, &ptr);
		    if (hnd -> interactive && wsp && (ptr == hnd -> input_bufptr))
		      /* Only white space between begin and buffer end */
		      return (0);
		    
		    /* Something to parse */
		    seen_end_chunk = 1;
		  }; break;
	        case ParagraphInputMode:
		  { if (prev_newline != NULL)
		      { /* Handle special case of two sequential newlines */
		        if (prev_newline == hnd -> input_bufptr - 2)
			  { seen_end_chunk = 1;
			    break;
			  };

			/* Otherwise, check if there is only white space between the two \n's */
			/* Note that this only succeeds if \n is white space */
		        *hnd -> input_bufptr = '\0';	/* Mark end of check */
		        ptr = prev_newline;
		        wsp = ebs_is_white_spaces (hnd -> lexicon, &ptr);
		        if (wsp && (ptr == hnd -> input_bufptr))
			  seen_end_chunk = 1;
		      }
		    prev_newline = hnd -> input_bufptr - 1;
		  };
		case DocumentInputMode: break;
		default:
		  dcg_bad_tag (hnd -> input_mode, "erts_read_input");
	      };
	  default: break;
	};
    };

  /* Remember end of file */
  if (feof (hnd -> input_file))
    hnd -> seen_eof = 1;

  /* If we are not in document mode, chop trailing newline */
  if (hnd -> input_mode != DocumentInputMode)
    { if ((hnd -> input_bufptr != hnd -> input_buffer) &&
	  (*(hnd -> input_bufptr - 1) == '\n'))
	hnd -> input_bufptr--;
    };
  hnd -> input_length = hnd -> input_bufptr - hnd -> input_buffer;
  append_input_char (hnd, '\0');

  /* Chop (return) and newline after document separator */
  if (hnd -> seen_doc_sync)
    { int inch = fgetc (hnd -> input_file);
      if (inch == '\r') inch = fgetc (hnd -> input_file);
      if (inch != '\n') inch = ungetc (inch, hnd -> input_file);
    };

  /* Doneski */
  return (1);
}

void erts_try_output_doc_sync (EagrtsHandle hnd)
{ if (hnd -> seen_doc_sync)
    { fprintf (hnd -> output_file, "\001\n");
      fflush (hnd -> output_file);
      hnd -> seen_doc_sync = 0;
    };
}
