/*
   File: abase_fileutil.c
   Defines routines to handle files and filenames, and save and load
   binary data (lexica, compiled grammars, etc. ) in a machine and
   OS independent way.

   The format of the binary IO is taken from the ELF standard for the
   primitive types char, int and string. Furthermore we assume that
   structured data is saved recursively, field by field. Binary files
   contain an AGFL specific header, version and checksum.

   Copyright 2009 Radboud University of Nijmegen

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.

   CVS ID: "$Id$"
*/

/* include config.h if autoconfigured */
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /* HAVE_CONFIG_H */

/* global includes */
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <sys/stat.h>

/* local includes */
#include "abase_porting.h"
#include "abase_error.h"
#include "abase_version.h"
#include "abase_memalloc.h"
#include "abase_pool_alloc.h"
#include "abase_fileutil.h"

#ifndef HAVE_GETC_UNLOCKED
#define getc_unlocked	getc
#endif
#ifndef HAVE_PUTC_UNLOCKED
#define putc_unlocked	putc
#endif

/* Compatibility - implement missing function */

#if !HAVE_VSNPRINTF
/*
 * If we don't have vsnprintf(3), implement it using vsprintf(3),
 * and check for buffer overflow. If detected, abort(3).
 * Not perfect, but at least you're warned where it happens.
 */
int vsnprintf(char * __restrict str, size_t size, const char * __restrict format, va_list ap)
{
    size_t res = vsprintf(str, format, ap);
    if (res > size) {
	fprintf(stderr, "vsnprintf buffer overflow: format=%s\n", format);
	abort();
    }
    return res;
}

#endif /* !HAVE_VSNPRINTF */

/* File opening */
FILE *abs_fopen (char *path, char *mode)
	{ FILE *fd = fopen (path, mode);
	  if (fd == NULL)
            abs_fatal ("could not open file '%s' for mode '%s'", path, mode);
	  return (fd);
	}

/* primitive routines to check the existence of a path */
int abs_file_exists (char *path)
	{
#ifdef WIN32
	  struct _stat sbuf;
	  if (_stat (path, &sbuf)) return (0);
#else
	  struct stat sbuf;
	  if (stat (path, &sbuf)) return (0);
#endif
	  return (1);
	}

int abs_file_ext_exists (char *path, char *ext)
	{ char *npath = abs_new_fmtd_string ("abs_file_ext_exists", "%s.%s", path, ext);
	  int st = abs_file_exists (npath);
	  abs_free (npath, "abs_file_ext_exists");
	  return (st);
	}

int abs_is_normal_file (char *path)
        {
#ifdef WIN32
          struct _stat sbuf;
          if (_stat (path, &sbuf)) return (0);
          return (sbuf.st_mode & _S_IFREG);
#else
          struct stat sbuf;
          if (stat (path, &sbuf)) return (0);
          return (sbuf.st_mode & S_IFREG);
#endif
        }

int abs_is_directory (char *path)
        {
#ifdef WIN32
          struct _stat sbuf;
          if (_stat (path, &sbuf)) return (0);
          return (sbuf.st_mode & _S_IFDIR);
#else
          struct stat sbuf;
          if (stat (path, &sbuf)) return (0);
          return (sbuf.st_mode & S_IFDIR);
#endif
        }

int abs_file_mtime (char *path, time_t *mtime)
	{
#ifdef WIN32
	  struct _stat sbuf;
	  if (_stat (path, &sbuf)) return (0);
#else
	  struct stat sbuf;
	  if (stat (path, &sbuf)) return (0);
#endif
	  *mtime = sbuf.st_mtime;
	  return (1);
	}

/*
 * Skip EF BB BF, the optional (and not recommended) UTF-8 version
 * of the Byte Order Mark U+FEFF.
 *
 * Since more than 1 character of pushback is not guaranteed,
 * this may not work properly if a prefix of the BOM is found
 * which is not a complete BOM.
 * (seeking back also is not guaranteed, for pipes)
 */
void abs_file_skip_bom(FILE *file)
	{
	    int ch1, ch2, ch3;
	    ch1 = getc(file);
	    if (ch1 == 0xEF)
	    { ch2 = getc(file);
	      if (ch2 == 0xBB)
	      { ch3 = getc(file);
	        if (ch3 == 0xBF) return;
		ungetc(ch3, file);
	      }
	      ungetc(ch2, file);
	    }
	    ungetc(ch1, file);
	}

/* path construction */
char *abs_construct_path (char *directory, char *fname)
	{ return (abs_new_fmtd_string ("abs_construct_path", "%s%c%s", directory, DIRSEP, fname));
	}

/* Binary file type definition and initialisation */
struct bin_file_rec
	{ FILE *file;
	  char *path;
	  int writing;
	  u_int32 checksum;
	};
	  
BinFile abs_bin_fopen (char *path, char *mode)
	{ char *bmode = NULL;
	  int writing = 0;
	  FILE *file;
	  BinFile bf;

	  if (strcmp (mode, "rb") == 0)		{ writing = 0; bmode = "rb"; }
	  else if (strcmp (mode, "r") == 0)	{ writing = 0; bmode = "rb"; }
	  else if (strcmp (mode, "wb") == 0)	{ writing = 1; bmode = "wb"; }
	  else if (strcmp (mode, "w") == 0)	{ writing = 1; bmode = "wb"; }
	  else abs_abort ("abs_bin_fopen",
			  "Illegal mode '%s' to open binary file '%s'", mode, path);
	  file = fopen (path, bmode);
	  if (file == NULL)
	  { abs_message ("Could not open binary file '%s' with mode '%s'", path, mode);
	    return NULL;
	  }
	  bf = (BinFile) abs_malloc (sizeof (struct bin_file_rec), "abs_bin_fopen");
	  bf -> file = file;
	  bf -> path = path;
	  bf -> writing = writing;
	  bf -> checksum = 0;
	  return (bf);
	}

void abs_bin_fclose (BinFile bf)
	{ if (bf == NULL)	  abs_abort ("abs_bin_fclose", "Called with null pointer");
	  if (bf -> file == NULL) abs_abort ("abs_bin_fclose", "Binary file not open");
	  fclose (bf -> file);
	  abs_free ((void *) bf, "abs_bin_fclose");
	}

void abs_bin_reset_checksum (BinFile bf)
	{ if (bf == NULL)	  abs_abort ("abs_bin_reset_checksum", "Called with null pointer");
	  if (bf -> file == NULL) abs_abort ("abs_bin_reset_checksum", "Binary file not open");
	  bf -> checksum = 0;
	}

off_t abs_bin_ftell (BinFile bf)
	{ if (bf == NULL)	  abs_abort ("abs_bin_tell", "Called with null pointer");
	  if (bf -> file == NULL) abs_abort ("abs_bin_tell", "Binary file not open");
#ifdef WIN32
	  return ((off_t) ftell (bf -> file));
#else
	  return (ftello (bf -> file));
#endif
	}

FILE *abs_bin_file (BinFile bf)
	{ if (bf == NULL)	  abs_abort ("abs_bin_file", "Called with null pointer");
	  return bf -> file;
	}

int abs_bin_seek (BinFile bf, off_t offset, int whence)
	{ if (bf == NULL)	  abs_abort ("abs_bin_fseek", "Called with null pointer");
	  if (bf -> file == NULL) abs_abort ("abs_bin_fseek", "Binary file not open");
	  return (fseek (bf -> file, offset, whence));
	}

size_t abs_bin_read (BinFile bf, void *buf, size_t nbytes)
	{ if (bf == NULL)	  abs_abort ("abs_bin_read", "Called with null pointer");
	  if (bf -> file == NULL) abs_abort ("abs_bin_read", "Binary file not open");
	  return (fread (buf, 1, nbytes, bf -> file));
	}

size_t abs_bin_write (BinFile bf, void *buf, size_t nbytes)
	{ if (bf == NULL)	  abs_abort ("abs_bin_write", "Called with null pointer");
	  if (bf -> file == NULL) abs_abort ("abs_bin_write", "Binary file not open");
	  return (fwrite (buf, 1, nbytes, bf -> file));
	}

/* Saving routines */
void abs_bin_save_eof (BinFile bf)
	{ if (bf == NULL)	  abs_abort ("abs_bin_save_eof", "Called with null pointer");
	  if (bf -> file == NULL) abs_abort ("abs_bin_save_eof", "Binary file not open");
	  if (!bf -> writing) abs_abort ("abs_bin_save_eof", "Binary file not open for writing");
	  putc_unlocked ((0xff - bf -> checksum) & 0xff, bf -> file); 
	}

void abs_bin_save_char (BinFile bf, char x)
	{ if (bf == NULL)	  abs_abort ("abs_bin_save_char", "Called with null pointer");
	  if (bf -> file == NULL) abs_abort ("abs_bin_save_char", "Binary file not open");
	  if (!bf -> writing) abs_abort ("abs_bin_save_char", "Binary file not open for writing");
	  bf -> checksum = (bf -> checksum + (u_int32) ((unsigned char) x)) & 0xff;
	  putc_unlocked (x, bf -> file);
	}

/*
   (Unsigned) ints are saved run length encoded according to the Dwarf 2 standard
   Chunks of 7 bit, beginning with the least significant bits are output until
   there are no more significant bits to output. The sign bit in each chunk is
   used to indicate if more chunks are following.
*/
void abs_bin_save_int (BinFile bf, int x)
	{ int value = x;
	  int more = 1;
	  do
	     { int byte = value & 0x7f;			/* Cut off 7 lsbs */
	       value >>= 7;				/* Discard them but keep sign */
	       if ((value == 0) && !(byte & 0x40)) more = 0;
	       if ((value == -1) && (byte & 0x40)) more = 0;
	       if (more) byte |= 0x80;
	       abs_bin_save_char (bf, (char) byte);
	     }
	  while (more);
	}

void abs_bin_save_int64 (BinFile bf, int64 x)
	{ int64 value = x;
	  int more = 1;
	  do
	     { int byte = (int) (value & int64_const (0x7f));	/* Cut off 7 lsbs */
	       value >>= 7;				/* Discard them but keep sign */
	       if ((value == 0) && !(byte & 0x40)) more = 0;
	       if ((value == -1) && (byte & 0x40)) more = 0;
	       if (more) byte |= 0x80;
	       abs_bin_save_char (bf, (char) byte);
	     }
	  while (more);
	}

void abs_bin_save_u_int (BinFile bf, u_int x)
	{ u_int value = x;
	  do
	     { int byte = value & 0x7f;			/* Cut off 7 lsbs */
	       value >>= 7;
               if (value) byte |= 0x80;
	       abs_bin_save_char (bf, (char) byte);
	     }
	  while (value);
	}

void abs_bin_save_u_int64 (BinFile bf, u_int64 x)
	{ u_int64 value = x;
	  do
	     { int byte = (int) (value & u_int64_const (0x7f));		/* Cut off 7 lsbs */
	       value >>= 7;
               if (value) byte |= 0x80;
	       abs_bin_save_char (bf, (char) byte);
	     }
	  while (value);
	}

void abs_bin_save_string (BinFile bf, char *x)
	{ int len = (int) strlen (x);
	  int ix;
	  abs_bin_save_int (bf, len);
	  for (ix = 0; ix < len; ix++) abs_bin_save_char (bf, x[ix]);
	}

void abs_bin_save_version_nr (BinFile bf, char *kind, char *version)
	{ abs_bin_save_string (bf, "AGFL");
	  abs_bin_save_string (bf, kind);
	  abs_bin_save_string (bf, version);
	}

void abs_bin_save_version (BinFile bf, char *kind)
	{ abs_bin_save_version_nr (bf, kind, AGFL_VERSION);
	}

/* Loading routines */
static void abs_bin_check_bf (BinFile bf)
	{ if (bf == NULL)	  abs_abort ("abs_bin_load_char", "Called with null pointer");
	  if (bf -> file == NULL) abs_abort ("abs_bin_load_char", "Binary file not open");
	  if (bf -> writing)      abs_abort ("abs_bin_load_char", "Binary file not open for reading");
	}

static void abs_bin_load_char_unchecked (BinFile bf, char *x)
	{ int ch;
	  ch = getc_unlocked (bf -> file);
	  if (ch == EOF)
	     abs_abort ("abs_bin_load_char", "Binary file '%s' ends prematurely", bf -> path);
	  bf -> checksum = (bf -> checksum + (u_int32) ch) & 0xff;
	  *x = (char) ch;
	}

void abs_bin_load_char (BinFile bf, char *x)
	{ abs_bin_check_bf (bf);
	  abs_bin_load_char_unchecked (bf, x);
	}

int abs_bin_verify_eof (BinFile bf)
	{ char ch;
	  int err = 0;
	  abs_bin_load_char (bf, &ch);		/* Load checksum */
	  if ((bf -> checksum & 0xff) != 0xff)
	    { abs_message ("Binary file '%s' has an incorrect checksum", bf -> path);
	      err = -1;
	    }
	  if (getc_unlocked (bf -> file) != EOF)
	    { abs_message ("Binary file '%s' has trailing garbage", bf -> path);
	      err = -1;
	    }
	  return err;
	}

void abs_bin_load_int (BinFile bf, int *x)
	{ int value = 0;
	  int shift = 0;
	  char bb;
	  abs_bin_check_bf (bf);
	  do
	     { abs_bin_load_char_unchecked (bf, &bb);
	       value |= (((u_int) (bb & 0x7f)) << shift);
	       shift += 7;
	     }
	  while (bb & 0x80);
	  if (shift > 31) shift = 31;
	  if (bb & 0x40) value |= -(1 << shift);
	  *x = value;
	}

void abs_bin_load_int64 (BinFile bf, int64 *x)
	{ int64 value = 0;
	  int shift = 0;
	  char bb;
	  abs_bin_check_bf (bf);
	  do
	     { abs_bin_load_char_unchecked (bf, &bb);
	       value |= (((u_int64) (bb & 0x7f)) << shift);
	       shift += 7;
	     }
	  while (bb & 0x80);
	  if (shift > 63) shift = 63;
	  if (bb & 0x40) value |= -(1 << shift);
	  *x = value;
	}

void abs_bin_load_u_int (BinFile bf, u_int *x)
	{ int value = 0;
	  int shift = 0;
	  char bb;
	  abs_bin_check_bf (bf);
	  do
	     { abs_bin_load_char_unchecked (bf, &bb);
	       value |= (((u_int) (bb & 0x7f)) << shift);
	       shift += 7;
	     }
	  while (bb & 0x80);
	  *x = value;
	}

void abs_bin_load_u_int64 (BinFile bf, u_int64 *x)
	{ u_int64 value = 0;
	  int shift = 0;
	  char bb;
	  do
	     { abs_bin_load_char_unchecked (bf, &bb);
	       value |= (((u_int64) (bb & 0x7f)) << shift);
	       shift += 7;
	     }
	  while (bb & 0x80);
	  *x = value;
	}

void abs_bin_load_string (BinFile bf, char **x)
	{ char *buffer;
	  int ix, size;
	  abs_bin_load_int (bf, &size);
	  if (size >= MAXSTRLEN)
	     abs_abort ("abs_bin_load_string",
			"Binary file '%s' contains a string of length %d", bf -> path, size);
	  *x = buffer = abs_malloc(size + 1, "abs_bin_load_string");
	  for (ix = 0; ix < size; ix++)
	     abs_bin_load_char_unchecked (bf, &buffer[ix]);
	  buffer[size] = '\0';
	}

void abs_bin_load_pool_string (struct abs_pool *p, BinFile bf, char **x)
	{ char *buffer;
	  int ix, size;
	  abs_bin_load_int (bf, &size);
	  if (size >= MAXSTRLEN)
	     abs_abort ("abs_bin_load_pool_string",
			"Binary file '%s' contains a string of length %d", bf -> path, size);
	  *x = buffer = abs_pool_malloc_unaligned(p, size + 1, "abs_bin_load_string_pool");
	  for (ix = 0; ix < size; ix++)
	     abs_bin_load_char_unchecked (bf, &buffer[ix]);
	  buffer[size] = '\0';
	}

int abs_bin_verify_version_nr (BinFile bf, char *kind, int silent, char *version)
	{ char *hdr;
	  int err = 0;
	  abs_bin_load_string (bf, &hdr);
	  if (strcmp (hdr, "AGFL") != 0) 
	  { if (!silent)
	    { abs_message ("File '%s' does not appear to be an AGFL binary file", bf -> path);
	    }
	    err = -1;
	  }
	  abs_free (hdr, "abs_bin_verify_version");
	  if (err != 0) return err;
	  abs_bin_load_string (bf, &hdr);
	  if (strcmp (hdr, kind) != 0)
	  {  if (!silent)
	     { abs_message ("File '%s' does not appear to be a %s file", bf -> path, kind);
	     }
	     err = -1;
	  }
	  abs_free (hdr, "abs_bin_verify_version");
	  if (err != 0) return err;
	  abs_bin_load_string (bf, &hdr);
	  if (strcmp (hdr, version) != 0)
	  {  if (!silent)
	     { abs_message ("File '%s' is generated by AGFL version %s\n"
			    "you should regenerate it with the current AGFL version %s",
			    bf -> path, hdr, version);
	     }
	     err = -1;
	  }
	  abs_free (hdr, "abs_bin_verify_version");
	  return err;
	}

int abs_bin_verify_version (BinFile bf, char *kind, int silent)
	{ return abs_bin_verify_version_nr (bf, kind, silent, AGFL_VERSION);
	}
