/*
 * $Id: lxcn_voc_iterator.c,v 1.3 2007/09/17 09:53:55 olafs Exp $
 *
 * Code for an iterator over a Vocabulary.
 *
 * Typical use: see header file.
 */

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "abase_memalloc.h"
#include "abase_fileutil.h"
#include "abase_error.h"
#include "lxcn_vocabulary.h"
#include "lxcn_vocabulary_impl.h"
#include "lxcn_voc_iterator.h"

#define STACKDEPTH 1024

typedef struct VocStackEntry {
    enum { voctrie_type, vocindextree_type } type;
    union {
	VocTrie vt;	
	VocIndexTree vit;
    } u;
} VocStackEntry;

typedef struct VocStack {
    VocTrie trie;
    int sp;
    struct VocStackEntry stack[STACKDEPTH];
} VocStack;

static
void
push_vit(VocStack *stack, VocIndexTree t)
{
    int sp = ++stack->sp;
    assert(sp < STACKDEPTH);		/* could add dynamic stack growing */
    stack->stack[sp].u.vit = t;
    stack->stack[sp].type = vocindextree_type;
}

static
VocIndexTree
pop_vit(VocStack *stack)
{
    assert(stack->sp >= 0);
    if (stack->stack[stack->sp].type == vocindextree_type) {
	return stack->stack[stack->sp--].u.vit;
    }
    return voc_index_tree_nil;
}

static
void
push_vt(VocStack *stack, VocTrie t)
{
    int sp = ++stack->sp;
    assert(sp < STACKDEPTH);		/* could add dynamic stack growing */
    stack->stack[sp].u.vt = t;
    stack->stack[sp].type = voctrie_type;
}

static
VocTrie
pop_vt(VocStack *stack)
{
    assert(stack->sp >= 0);
    if (stack->stack[stack->sp].type == voctrie_type) {
	return stack->stack[stack->sp--].u.vt;
    }
    return voc_trie_nil;
}

static
VocIndexTree
first_index_iteration(VocIndexTree l, struct VocStack *stack)
{
    VocIndexTree left;

    // Start at the lower left.
    while ((left = l->left)) {
	push_vit(stack, l);
	l = left;
    }

    return l;
}

/*
 * Standard in-order iterative tree traversal.
 */
static
VocIndexTree
next_index_iteration(VocIndexTree l, struct VocStack *stack)
{
    VocIndexTree right;

    assert(l);

    /* Go to leftmost child of right subtree */
    if ((right = l->right)) {
	VocIndexTree left;

	push_vit(stack, l);
	l = right;
	while ((left = l->left)) {
	    push_vit(stack, l);
	    l = left;
	}
	return l;
    } else {
	// Go up as long as we came from the right.
	VocIndexTree parent;

	for (;;) {
	    parent = pop_vit(stack);
	    if (parent == voc_index_tree_nil)
		return parent;	// done with traversal
	    if (l == parent->left)
		return parent;	// not visited the parent yet
	    l = parent;
	}
    }
}

struct VocStack *
lxcn_start_iteration(VocTrie t)
{
    struct VocStack *stack = malloc(sizeof *stack);

    stack->trie = t;
    stack->sp = -1;
    push_vt(stack, t);

    return stack;
}

void
lxcn_end_iteration(struct VocStack *stack)
{
    free(stack);
}

/*
 * This function should be called directly after lxcn_start_iteration().
 * The only reason it is separate is to keep a similar calling signature
 * as lxcn_next_iteration() rather than adding all the arguments to
 * lxcn_start_iteration().
 * You may even call this function after calling lxcn_next_iteration(),
 * it will simply return the same values.
 */
int
lxcn_first_iteration(struct VocStack *stack, char **search_key, int **info)
{
    VocTrie t = stack->stack[stack->sp].u.vt;
    assert(stack->stack[stack->sp].type == voctrie_type);

    if (t->search_key == NULL)
	return lxcn_next_iteration(stack, search_key, info);

    *search_key = t->search_key;
    *info = &t->info;
    return 1;
}

/*
 * Step the current position to the next.
 * Pre-order iterative trie traversal.
 *
 * Stack layout invariant:
 *
 *	(bottom of stack)
 * 0	voctrie_type	   (the root)
 *	vocindextree_type+ (1 or more, handled by next_index_iteration())
 *    / voctrie_type       \ *
 *    \ vocindextree_type+ /
 * sp->	voctrie_type	   (the current position)
 *	
 * i.e. with blocks of vocindextree_types separated by 1 voctrie_type.
 *
 * A difference between the 2 types of iteration is that the index tree
 * iterator keeps it current position outside the stack, while the trie
 * iterator keeps it at the top of the stack. The latter is useful to
 * serve as separator between the index tree entries, so that we can
 * see when one of those substacks is empty.
 */
int
lxcn_next_iteration(struct VocStack *stack, char **search_key, int **info)
{
    VocTrie t, sub_trie;
    VocIndexTree vit;

    assert(stack->sp >= 0);
    assert(stack->stack[stack->sp].type == voctrie_type);

    t = stack->stack[stack->sp].u.vt;

    if (t->tails != voc_index_tree_nil) {	/* descend into trie */
	vit = first_index_iteration(t->tails, stack);
	push_vit(stack, vit);
    } else {			       /* proceed at same trie level */
	VocTrie parent, t2 = pop_vt(stack);
	VocIndexTree next;

	assert(t2 == t);
	for (;;) {
	    vit = pop_vit(stack);
	    next = next_index_iteration(vit, stack);
	    if (next != voc_index_tree_nil)
		break;
	    parent = pop_vt(stack);	/* go up as long as necessary */
	    assert(parent != NULL);    /* until we can really proceed */
	    if (stack->sp < 0)
		return 0;
	}
	push_vit(stack, next);
	vit = next;
    }

    sub_trie = vit->sub_trie;
    push_vt(stack, sub_trie);
    /* If the trie node does not have a key, skip it */
    if (sub_trie->search_key == NULL)
	return lxcn_next_iteration(stack, search_key, info);
    *search_key = sub_trie->search_key;
    *info = &sub_trie->info;

    return 1;
}

/*
 * And now the same thing, but on the (alphabetical format) bin files.
 * Thanks to the new "4" bit in the tree nodes, we can distinguish
 * them from trie nodes, so we don't need a stack to remember where
 * we are.
 */

struct VocBinIter {
    BinFile bf;
    off_t eof;
};

struct VocBinIter *
lxcn_bin_start_iteration(char *path)
{
    struct VocBinIter *vi = abs_malloc(sizeof(*vi), "lxcn_bin_start_iteration");
    BinFile bf = abs_bin_fopen (path, "r");
    
    vi->bf = bf;
    abs_bin_seek(bf, -1, SEEK_END);
    vi->eof = abs_bin_ftell(bf);
    abs_bin_seek(bf, 0, SEEK_SET);
    abs_bin_verify_version (bf, "vocabulary");

    return vi;
}

void
lxcn_bin_end_iteration(struct VocBinIter *vi)
{
    if (vi->bf) {
	abs_bin_verify_eof (vi->bf);
	abs_bin_fclose (vi->bf);
	vi->bf = NULL;
    }
    abs_free(vi, "lxcn_bin_end_iteration");
}

/*
 * Get the next key/info pair from the binary file which contains the
 * vocabulary.
 */
int
lxcn_bin_next_iteration(struct VocBinIter *vi, char **search_key, int *info)
{
    BinFile bf = vi->bf;
    off_t eof = vi->eof;

    while (abs_bin_ftell(bf) < eof) {
	char tag;

	abs_bin_load_char (bf, &tag);
	if ((tag & ~7) != 0)
	    abs_abort ("lxcn_bin_next_iteration", "read erroneous tree/trie tag 0x%02x @%ld", tag, (long)abs_bin_ftell(bf));

	if (tag & 4) {
	    /* We have a tree node, which we're not interested in. */
	    char key, balfac;

	    abs_bin_load_char (bf, ((char *) &key));
	    abs_bin_load_char (bf, ((char *) &balfac));
	} else {
	    /* This is a trie node, which may contain a key and info value. */

	    if (tag & 1) {
		int rem_offset = 0;

		/* load key, offset and info */
		abs_bin_load_string (bf, search_key);
		abs_bin_load_int (bf, &rem_offset);
		abs_bin_load_int (bf, info);

		return 1;
	    }
	}
    }

    return 0;
}
