/*
   File: lxcn_vocabulary.c
   Defines the functions to manipulate a vocabulary

   CVS ID: "$Id: lxcn_vocabulary.c,v 1.4 2006/07/26 10:35:36 marcs Exp $"
*/

/* include config.h if autoconfigured */
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

/* global includes */
#include <stdio.h>
#include <ctype.h>
#include <string.h>

/* Libabase includes */
#include <abase_porting.h>
#include <abase_error.h>
#include <abase_memalloc.h>
#include <abase_fileutil.h>

/* Local includes */
#include "lxcn_vocabulary.h"

/*
   Node allocation
*/
static VocTrie new_trie (VocIndexTree tails, VocInfo info, char *search_key, int rem_offset)
	{ VocTrie new_node = (VocTrie) abs_malloc (sizeof (struct voc_trie_rec), "new_trie");
	  new_node -> tails = tails;
	  new_node -> info = info;
	  new_node -> search_key = search_key;
	  new_node -> rem_offset = rem_offset;
	  return (new_node);
	}

static VocIndexTree new_index_tree (VocIndexTree left, VocIndexTree right,
				    VocTrie sub_trie, unsigned char key, signed char balfac)
	{ VocIndexTree new_node =
		(VocIndexTree) abs_malloc (sizeof (struct voc_index_tree_rec), "new_index_tree");
	  new_node -> left = left;
	  new_node -> right = right;
	  new_node -> sub_trie = sub_trie;
	  new_node -> key = key;
	  new_node -> balfac = balfac;
	  return (new_node);
	}

/*
   Leaf construction
*/
static VocTrie make_trie_leaf (VocInfo info, AddVocInfoProc add_info, char *key, int offset)
	{ /* Privatize search key and allocate trie node */
	  char *new_key = abs_new_string (key, "make_trie_leaf");
	  VocTrie new_leaf = new_trie (voc_index_tree_nil, voc_info_nil, new_key, offset);

	  /* Call addinfoproc upon entry if it is not NULL to add the info */
	  if (add_info != NULL) add_info (&new_leaf -> info, info);

	  /* Done creating trie node */
	  return (new_leaf);
	}

static VocIndexTree make_subtrie_leaf (VocTrie current)
	{ /* Allocate node and copy contents */
	  VocTrie new_leaf =
		(VocTrie) abs_malloc (sizeof (struct voc_trie_rec), "make_subtrie_leaf");
	  unsigned char key = (unsigned char) current -> search_key[current -> rem_offset];
	  new_leaf -> tails = voc_index_tree_nil;
	  new_leaf -> info = current -> info;
	  new_leaf -> search_key = current -> search_key;
	  new_leaf -> rem_offset = current -> rem_offset + 1;
	  return (new_index_tree (voc_index_tree_nil, voc_index_tree_nil, new_leaf, key, 0));
	}

static VocIndexTree make_index_tree_leaf (VocInfo info, AddVocInfoProc add_info,
					  char *search_key, int offset)
	{ VocTrie singlet_trie = make_trie_leaf (info, add_info, search_key, offset + 1);
	  unsigned char key = (unsigned char) search_key[offset];
	  return (new_index_tree (voc_index_tree_nil, voc_index_tree_nil, singlet_trie, key, 0));
	}

/*
   Trie insertion
*/
static void enter_into_trie (VocTrie *root, VocInfo info, AddVocInfoProc add_info,
			     char *search_key, int offset);
static void enter_into_index_tree (VocIndexTree *root, VocInfo info, AddVocInfoProc add_info,
				   char *search_key, int offset)
	{ VocIndexTree *fixation;
	  VocIndexTree *insert_ptr;
	  VocIndexTree last_unbal;
	  VocIndexTree below_unbal;
	  VocIndexTree twobelow_unbal;
	  VocIndexTree ptr, new_index_tree;
	  unsigned char key = search_key[offset];

	  /*
	     The following action can only be taken when our father node contains
	     a valid prefix of our search key and our search key is the first key
	     to be entered with this prefix.
	  */
	  if (*root == voc_index_tree_nil)
	    { *root = make_index_tree_leaf (info, add_info, search_key, offset);
	      return;
	    };

	  /* search in tree */
	  insert_ptr = root;
	  fixation = root;
	  last_unbal = *root;
	  while ((*insert_ptr) != voc_index_tree_nil)
	    { if ((*insert_ptr) -> balfac)
		{ fixation = insert_ptr;
		  last_unbal = *insert_ptr;
		};
	      if (key < (*insert_ptr) -> key)
	        insert_ptr = &((*insert_ptr) -> left);
	      else if (key > (*insert_ptr) -> key)
		insert_ptr = &((*insert_ptr) -> right);
	      else
		{ /* We found a subtrie for this key, continue with next offset */
		  enter_into_trie (&((*insert_ptr) -> sub_trie), info, add_info,
				   search_key, offset + 1);
		  return;
	        };
	    };

	  /*
	     We found a location to insert the new leaf and we remember it: the
	     pointer rotations for rebalancing may not leave *insert_ptr intact
	  */
	  new_index_tree = make_index_tree_leaf (info, add_info, search_key, offset);
	  *insert_ptr = new_index_tree;

	  /* adjust balance factors from last_unbal to the inserted node */
	  if (key < last_unbal -> key)
	    { below_unbal = last_unbal -> left;
	      last_unbal -> balfac--;
	    }
	  else
	    { below_unbal = last_unbal -> right;
	      last_unbal -> balfac++;
	    };

	  ptr = below_unbal;
	  while (ptr != new_index_tree)
	    if (key < ptr -> key)
	      { ptr -> balfac--;
		ptr = ptr -> left;
	      }
	    else
	      { ptr -> balfac++;
		ptr = ptr -> right;
	      };

	  /* if tree not too much out of balance, done */
	  if ((-1 <= last_unbal -> balfac) && (last_unbal -> balfac <= 1))
	    return;

	  /* if last_unbal has the same sign as below_unbal, it is easy */
	  if (((last_unbal -> balfac > 0) && (below_unbal -> balfac > 0)) ||
	      ((last_unbal -> balfac < 0) && (below_unbal -> balfac < 0)))
	    { if (last_unbal -> balfac > 0)
		{ last_unbal -> right = below_unbal -> left;
		  below_unbal -> left = last_unbal;
		}
	      else
		{ last_unbal -> left = below_unbal -> right;
		  below_unbal -> right = last_unbal;
		}
	      last_unbal -> balfac = 0;
	      below_unbal -> balfac = 0;
	      *fixation = below_unbal;
	      return;
	    };

	  /* The difficult case */
	  if (last_unbal -> balfac > 0)
	    { twobelow_unbal = below_unbal -> left;
	      last_unbal -> right = twobelow_unbal -> left;
	      below_unbal -> left = twobelow_unbal -> right;
	      twobelow_unbal -> left = last_unbal;
	      twobelow_unbal -> right = below_unbal;
	      last_unbal -> balfac = (twobelow_unbal -> balfac == 1)?-1:0;
	      below_unbal -> balfac = (twobelow_unbal -> balfac == -1)?1:0;
	    }
	  else
	    { twobelow_unbal = below_unbal -> right;
	      last_unbal -> left = twobelow_unbal -> right;
	      below_unbal -> right = twobelow_unbal -> left;
	      twobelow_unbal -> right = last_unbal;
	      twobelow_unbal -> left = below_unbal;
	      last_unbal -> balfac = (twobelow_unbal -> balfac == -1)?1:0;
	      below_unbal -> balfac = (twobelow_unbal -> balfac == 1)?-1:0;
	    };
	  twobelow_unbal -> balfac = 0;
	  *fixation = twobelow_unbal;
	}

static void enter_into_trie (VocTrie *root, VocInfo info, AddVocInfoProc add_info,
			     char *search_key, int offset)
	{ VocTrie current;
	  char *curr_key;
	  int curr_offset;
	  char *curr_remainder;

	  /* If we point to nil, create a new leaf and be done */
	  if (*root == voc_trie_nil)
	    { *root = make_trie_leaf (info, add_info, search_key, offset);
	      return;
	    };

	  /* Pick up current trie node, key and offset */
	  current = *root;
	  curr_key = current -> search_key;
	  curr_offset = current -> rem_offset;
	  curr_remainder = curr_key + curr_offset;

	  /*
	     If the current remainder is the same as our own remainder,
	     we have entered it before. Call add_info with the new info.
	  */
	  if ((curr_key != NULL) && (strcmp (curr_remainder, search_key + offset) == 0))
	    { if (add_info != NULL) add_info (&current -> info, info);
	      return;
	    };

	  /*
	     The key for which we are searching and the key of this node
	     are now certain to be different. If the current remainder still
	     points to a string of length > 0, the current node is a leaf of
	     the trie. In this case we lower the leaf to a sub_trie and
	     continue the search.
	  */
	  if ((curr_key != NULL) && (*curr_remainder != '\0'))
	    { /* Invalidate current entry */
	      current -> tails = make_subtrie_leaf (current);
	      current -> info = voc_info_nil;
	      current -> search_key = NULL;
	      current -> rem_offset = 0;
	    };

	  /*
	     At this point there are two possibilities:
	     a) The current key has an empty remainder and is a prefix of our search key
		(Note that it cannot be identical; that case has been checked earlier)
	     b) The current key was lowered by the previous check.

	     Hence if we are at the end of our search key we may enter it. This is
	     the final action when we enter a prefix of an already entered key.
	  */
	  if (search_key[offset] == '\0')
	    { current -> search_key = abs_new_string (search_key, "enter_into_trie");
	      current -> rem_offset = offset;
	      current -> info = NULL;
	      if (add_info != NULL) add_info (&current -> info, info);
	      return;
	    };

	  /*
	     At this point the current search pattern is not exhausted, not equal
	     to the remainder of the current node and a subtrie may exist.
 	     We continue through the index_tree.
	  */
	  enter_into_index_tree (&current -> tails, info, add_info, search_key, offset);
	}

/*
   Lookup in vocabularies
   Note that this is the exact matching lookup.
*/
static int lookup_in_trie (VocTrie voc, char *search_key, int offset, VocInfo *info);
static int lookup_in_index_tree (VocIndexTree vix, char *search_key, int offset, VocInfo *info)
{ /* We have a non zero character key to search for in the binary tree */
  unsigned char key = search_key[offset];
  VocIndexTree lv = vix;

  /* Iterative lookup */
  while (lv != voc_index_tree_nil)
    { if (key < lv -> key) lv = lv -> left;
      else if (key > lv -> key) lv = lv -> right;
      else
	/* Keys match; continue searching in subtrie */
	return (lookup_in_trie (lv -> sub_trie, search_key, offset + 1, info));
    };

  return (0);
}
   
static int lookup_in_trie (VocTrie voc, char *search_key, int offset, VocInfo *info)
	{ char *curr_key;
	  int curr_offset;

	  /* done for an empty vocabulary */
	  if (voc == voc_trie_nil)
	    return (0);

	  /* check if our search_key and our current search key match */
	  curr_key = voc -> search_key;
	  curr_offset = voc -> rem_offset;
	  if ((curr_key != NULL) && !strcmp (curr_key + curr_offset, search_key + offset))
	    { /* Keys match, return info */
	      *info = voc -> info;
	      return (1);
	    };

	  /* No match on current key; if the current key is a subtrie tail, we fail */
	  if ((curr_key != NULL) && (curr_key[curr_offset] != '\0'))
	    return (0);

	  /* No match on current key; if we have exhausted the search key, we fail */
	  if (search_key[offset] == '\0')  
	    return (0);

	  /* The search key is not exhausted and a subtrie exists */
	  return (lookup_in_index_tree (voc -> tails, search_key, offset, info));  
	}

/*
   Dumping of vocabularies
*/
static void dump_indent (int ind1, int ind2)
	{ int ix;
	  for (ix = 0; ix < ind1; ix++)
	     abs_printf (">");
	  for (ix = 0; ix < ind1 + ind2; ix++)
	     abs_printf (" ");
	}

static void dump_trie (VocTrie l, int ind1, int ind2);
static void dump_index_tree (VocIndexTree l, int ind1, int ind2)
	{ if (l == voc_index_tree_nil) return;
	  dump_index_tree (l -> left, ind1, ind2 + 1);
	  dump_trie (l -> sub_trie, ind1, ind2);
	  dump_index_tree (l -> right, ind1, ind2 + 1);
	}

static void dump_trie (VocTrie l, int ind1, int ind2)
	{ if (l == voc_trie_nil) abs_abort ("dump_trie", "nil trie");
	  if (l -> search_key != NULL)
	     { /* dump key possibly with voc_info */
	       dump_indent (ind1, ind2);
	       abs_message ("'%s' (rem %d)", l -> search_key, l -> rem_offset);
	     };
	  if (l -> tails != voc_index_tree_nil)
	     dump_index_tree (l -> tails, ind1 + 1, 0);
	}

/*
   Saving of vocabularies
*/
static void bin_save_trie (BinFile bf, SaveVocInfoProc sv, VocTrie l);
static void bin_save_index_tree (BinFile bf, SaveVocInfoProc sv, VocIndexTree l)
	{ char tag = 0;
	  if (l == voc_index_tree_nil) abs_bug ("bin_save_index_tree", "null index tree");
	  if (l -> sub_trie == voc_trie_nil) abs_bug ("bin_save_index_tree", "null subtrie");
	  if (l -> left != voc_index_tree_nil) tag |= 1;
	  if (l -> right != voc_index_tree_nil) tag |= 2;
	  abs_bin_save_char (bf, tag);
	  bin_save_trie (bf, sv, l -> sub_trie);
	  abs_bin_save_char (bf, l -> key);
	  abs_bin_save_char (bf, l -> balfac);
	  if (tag & 1) bin_save_index_tree (bf, sv, l -> left);
	  if (tag & 2) bin_save_index_tree (bf, sv, l -> right);
	}

static void bin_save_trie (BinFile bf, SaveVocInfoProc sv, VocTrie l)
	{ char tag = 0;
	  if ((l != voc_trie_nil) && (l -> search_key != NULL)) tag |= 1;
	  if ((l != voc_trie_nil) && (l -> tails != voc_index_tree_nil)) tag |= 2;
	  abs_bin_save_char (bf, tag);
	  if (tag & 1)
	    { /* Conditionally save info, key and offset */
	      if (sv != NULL) sv (bf, l -> info);
	      abs_bin_save_string (bf, l -> search_key);
	      abs_bin_save_int (bf, l -> rem_offset);
	    };
	  if (tag & 2) bin_save_index_tree (bf, sv, l -> tails);
	}

/*
   Loading of vocabularies
*/
static void bin_load_trie (BinFile bf, LoadVocInfoProc lv, VocTrie *l);
static void bin_load_index_tree (BinFile bf, LoadVocInfoProc lv, VocIndexTree *l)
	{ char tag;
	  unsigned char key;
	  signed char balfac;
	  VocTrie sub_trie;
	  VocIndexTree left = voc_index_tree_nil;
	  VocIndexTree right = voc_index_tree_nil;
	  abs_bin_load_char (bf, &tag);
	  if ((tag & 0xfc) != 0)
	    abs_abort ("bin_load_index_tree", "read erroneous index tree tag");
	  bin_load_trie (bf, lv, &sub_trie);
	  abs_bin_load_char (bf, &key);
	  abs_bin_load_char (bf, &balfac);
	  if ((balfac != -1) && (balfac != 0) && (balfac != 1))
	    abs_abort ("bin_load_index_tree", "read erroneous balance factor");
	  if (tag & 1) bin_load_index_tree (bf, lv, &left);
	  if (tag & 2) bin_load_index_tree (bf, lv, &right);
	  *l = new_index_tree (left, right, sub_trie, key, balfac);
	}

static void bin_load_trie (BinFile bf, LoadVocInfoProc lv, VocTrie *l)
	{ char tag;
	  VocIndexTree tails = voc_index_tree_nil;
	  VocInfo info = voc_info_nil;
	  char *search_key = NULL;
	  int rem_offset = 0;
	  abs_bin_load_char (bf, &tag);
	  if ((tag & 0xfc) != 0)
	    abs_abort ("bin_load_trie", "read erroneous trie tag");
	  if (tag & 1)
	     { /* Conditionally load info, key and offset */
	       if (lv != NULL) lv (bf, &info);
	       abs_bin_load_string (bf, &search_key);
	       abs_bin_load_int (bf, &rem_offset);
	     };
	  if (tag & 2) bin_load_index_tree (bf, lv, &tails);
	  *l = new_trie (tails, info, search_key, rem_offset);
	}

/*
   The actual vocabulary interface
*/
void lxcn_enter_into_vocabulary (Vocabulary *voc, char *search_key,
				 VocInfo info, AddVocInfoProc add_info)
	{ enter_into_trie (voc, info, add_info, search_key, 0);
	}

int lxcn_lookup_in_vocabulary (Vocabulary voc, char *search_key, VocInfo *info)
	{ return (lookup_in_trie (voc, search_key, 0, info));
	}

void lxcn_dump_vocabulary (Vocabulary voc)
	{ dump_trie (voc, 0, 0);
	}

void lxcn_bin_save_vocabulary (char *path, SaveVocInfoProc sv, Vocabulary voc)
	{ BinFile bf = abs_bin_fopen (path, "w");
	  abs_bin_save_version (bf, "vocabulary");
	  bin_save_trie (bf, sv, voc);
	  abs_bin_save_eof (bf);
	  abs_bin_fclose (bf);
	}

void lxcn_bin_load_vocabulary (char *path, LoadVocInfoProc lv, Vocabulary *voc)
	{ BinFile bf = abs_bin_fopen (path, "r");
	  abs_bin_verify_version (bf, "vocabulary");
	  bin_load_trie (bf, lv, voc);
	  abs_bin_verify_eof (bf);
	  abs_bin_fclose (bf);
	}
