// AVL-trie routines
//
// Copyright 2001, KUN.
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Library General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

// $Id: avltrie.cc,v 1.10 2001/10/09 11:59:17 ejv Exp $

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <globals.h>
#include <stddef.h>
#include <iostream.h>
#include <iomanip.h>
#include "avltrie.h"
#include "pool.h"
#include "key.h"
#include "entry.h"


//------------------------------------------------------------------------------
// Balancing stuff:
//------------------------------------------------------------------------------

int inverse(int balance)
{
    return 1 - balance;
}


//------------------------------------------------------------------------------
// Function:
//	 AvlNode::AvlNode(const char* str, const EntryList& entries)
//
// Description:
//	Construct new trie node. If str is not empty, its head
//	character is stored as key, and its associated value points
//	to a new trie in which the tail of str is stored recursively.
//	If str is end-of-string, a copy of entries is assigned to the
//	nodes value.
//------------------------------------------------------------------------------

AvlNode::AvlNode(const char* str, EntryList& entries)
{
    key = head(str);
    balance = Balanced;
    marker = 0;
    sons[Left] = 0;
    sons[Right] = 0;

    if ((str = tail(str)))
    {
        val.next = new AvlTrie();
        val.next->enter(str, entries);
    } else {
        val.entries = new EntryList(entries);
    }
}

EntryList*
AvlNode::contains(const char* str) const
{
    AvlNode* node = (AvlNode*)this;

    while (node) {
        int cmp = compare(node->get_key(), head(str));

        if (cmp < 0) {
            node = node->sons[Right];
        } else if (cmp > 0) {
            node = node->sons[Left];
        } else {
            if ((str = tail(str))) {
                node = node->next()->get_node();
            } else {
                return node->get_entries();
            }
        }
    }

    return 0;
}

AvlNode*
AvlNode::rotate(int dir)
{
    AvlNode* son = sons[dir];

    if (son == 0) {
        return this;
    }

    AvlNode* tmp = son->sons[inverse(dir)];
    son->sons[inverse(dir)] = this;
    sons[dir] = tmp;

    return son;
} 


//------------------------------------------------------------------------------
// Function:
//	AvlNode*
//	AvlNode::enter(const char* str,
//	               const EntryList& entries,
//	               unsigned& size)
//
// Description:
//	Enter str with entries to trie node, while keeping trie balanced.
//	If a new node is allocated, size is incremented. If a duplicate
//	str is found, a copy of entries is appended to the old entries.
//
// Return value:
//	Pointer to new top node of trie.
//------------------------------------------------------------------------------

AvlNode*
AvlNode::enter(const char* str, EntryList& entries, unsigned& size)
{
    int cmp, dir = 0;
    AvlNode* node = this;
    AvlNode* father = 0;
    AvlNode* critical = this;
    AvlNode* critical_father = 0;

    // Find node with key or enter key and entries in new leaf.
    // For rebalancing we need the critical node, i.e.
    // the last unbalanced node on the search path.

    KeyType key = head(str);
    while (node) {
        if (node->balance != Balanced) {
            critical = node;
            critical_father = father;
        }

        cmp = compare(node->get_key(), key);
        if (cmp < 0) {
            dir = Right;
        } else if (cmp > 0) {
            dir = Left;
        } else {
            // Duplicate found:
            if ((str = tail(str))) {
                node->next()->enter(str, entries);
            } else {
                node->get_entries()->append(entries);
            }

            return this;
        }

        father = node;
        node->prev_dir = dir;
        node = node->sons[dir];
    }

    AvlNode* inserted = new AvlNode(str, entries);
    father->sons[dir] = inserted;
    size++;

    // Set balance factors of previously balanced nodes
    // on path between critical node and new node to 
    // direction of the new node.

    int critical_dir = critical->prev_dir;
    node = critical->sons[critical_dir];
    while (node != inserted) {
        dir = node->prev_dir;
        node->balance = dir;
        node = node->sons[dir];
    }

    // Maybe rebalance tree at critical node, resulting in a new tree.

  if (critical->balance == Balanced) {
    critical->balance = critical_dir;
  } else if (critical->balance != critical_dir) {
    critical->balance = Balanced;
  } else {
      // Rebalance: 
        AvlNode* critical_son = critical->sons[critical_dir]; 
        if (critical_son->balance == critical->balance) {
            critical->balance = Balanced;
            critical_son->balance = Balanced;
            node = critical->rotate(critical_dir);
        } else {
            int pbal = critical->balance;
            int rbal = critical_son->sons[inverse(critical_dir)]->balance; 
            critical->balance = pbal == rbal ? inverse(rbal) : Balanced;
            critical_son->balance = pbal == inverse(rbal) ? inverse(rbal) : Balanced;
            critical_son->sons[inverse(critical_dir)]->balance = Balanced;
            critical->sons[critical_dir]= critical_son->rotate(inverse(critical_dir));
            node = critical->rotate(critical_dir);
        }

    // If the critical node has a father, the critical node
    // is replaced with the new root of the subtree.

        if (critical_father) {
            dir = critical_father->prev_dir;
            critical_father->sons[dir] = node;
        } else {
            return node;
        }
    }

    return this;
}

void
AvlNode::print(ostream& os, unsigned indent) const
{
    AvlNodeIter	iter(this);
    AvlNode*	node;

    while ((node = iter.next())) {
        os << setw(indent) << "";

        if (has_tail(node->get_key())) {
            os << text(node->get_key()) << endl;
            node->next()->print(os, indent + 1);
        } else {
            os << text(node->get_key());

            EntryList& e = *node->get_entries();
            unsigned idx = lex_entry_list_idx.add_entries(e);

            os << " --> " << idx << endl;
        }
    }
}

void
AvlTrie::print(ostream& os, unsigned indent) const
{
    if (node) {
        node->print(os, indent);
    }
}


//------------------------------------------------------------------------------
// Function: void AvlNode::delete_sons()
//
// Description:
//	 Delete sons on postorder visit using DSW marking.
//------------------------------------------------------------------------------

void
AvlNode::delete_sons()
{
    AvlNode *node = this;
    AvlNode *stack = 0;
    int     visit_left;

    while ((visit_left = node && !node->is_marked()) || stack) {
        if (visit_left) {
            // preorder visit node; pushleft
            AvlNode* tmp = node->sons[Left];
            node->sons[Left] = stack;
            stack = node;
            node = tmp;
        } else {
            if (node) {
                node->unmark();
            }

            if (!stack->is_marked()) {
                // inorder visit stack; leftright
                stack->mark();
                AvlNode* tmp = stack->sons[Left];
                stack->sons[Left] = node;
                node = stack->sons[Right];
                stack->sons[Right] = tmp;
            } else {
                // popright; post order visit node
                AvlNode* tmp = stack->sons[Right];
                stack->sons[Right] = node;
                node = stack;
                stack = tmp;

                if (has_tail(node->get_key())) {
                    delete node->next();
                } else {
                    delete node->get_entries();
                }

                delete node->sons[Left];
                delete node->sons[Right];
            }
        }
    }

    node->unmark();
}



//------------------------------------------------------------------------------
// Function:
//	unsigned AvlTrie::generate_entries(unsigned pos, ostream os)
//
// Description:
//	Write entries identified by entries in trie to stream os.
//	Each entries is preceeded by its offset in the flat representation
//	of the trie. Parameter pos is the offset of the current trie.
//      A sequence of tries of size 1 is called a path, and will be
//	represented as a string with its length (path compression).
//	The flat representation is as follows: l = path length,
//	n = nr. of branches, l chars on path, n branching chars, align,
//	n pointers to branches.
//
// Side effects:
//	In each trie, the offset pos is stored for use when generating
//	the trie itself.
// 
// Return value:
//	First free offset beyond trie in flat representation,
//	i.e. the size of the flat trie.
//------------------------------------------------------------------------------

unsigned
AvlTrie::generate_entries(unsigned pos)
{
    unsigned path;
    const AvlTrie* trie;

    // Store offset for use when generating flat trie.
    offset = pos;

    // Skip path of tries with only one branch.
    for (trie = this, path = 0;
            trie->get_size() == 1
                && has_tail(trie->get_node()->get_key())
                && path < 256;
            trie = trie->get_node()->next(), path++) {
    }

    // Calculate position of pointers.
    // Calculate next position beyond pointers.
    unsigned ptr_pos = align(pos + 2 + path + trie->get_size());
    unsigned next_pos = ptr_pos + 4 * trie->get_size();

#ifdef DEBUG
    cerr << "trie->get_size() = " << trie->get_size() << endl;
    cerr << "next_pos = " << next_pos << endl;
    cerr << "ptr_pos = " << ptr_pos << endl;
#endif

    // For each node in the trie that is a leaf, write entries to os.
    // For internal nodes, generate entries and calculate offsets
    // recursively.
    AvlNodeIter	iter(trie->get_node());

    while (AvlNode* next = iter.next()) {
        if (has_tail(next->get_key())) {
            next_pos = next->next()->generate_entries(next_pos);
        }

        ptr_pos += 4;
#ifdef DEBUG
        cerr << "in loop:" << endl;
        cerr << "\tnext_pos = " << next_pos << endl;
        cerr << "\tptr_pos = " << ptr_pos << endl;
#endif
    }

    return next_pos;
}


//------------------------------------------------------------------------------
// Function:
//	AvlTrie::generate_trie(unsigned count, ostream& os)
//
// Description:
//	Write flat representation of trie to stream os.
//	For each leaf, the entries number starting from count in stored.
//	Each trie is stored as follows: length of path = l, nr of
//	branches = n, l chars on path, n chars of branches, align,
//	offsets of next tries and entries counters.
//
// Return value:
//	The number of entries in the trie + count.
//
// Note:
//	Offsets should be filled in first by generate_entries().
//------------------------------------------------------------------------------

unsigned
AvlTrie::generate_trie(unsigned count, ostream& os) const
{
    char flat[4 + 6 * 256];
    unsigned path;
    const AvlTrie* trie;

    // Store characters on path
    unsigned pos = 2;
    for (trie = this, path = 0;
         trie->get_size() == 1 && has_tail(trie->get_node()->get_key())
                               && path < 256;
         trie = trie->get_node()->next(), path++) {
        flat[pos++] = trie->get_node()->get_key();
    }

    // Store path length and number of branches.
    flat[0] = (char)path;
    flat[1] = (char)trie->get_size();

    // Calculate offset of pointers.
    unsigned entry_pos = align(2 + path + trie->get_size());
    unsigned* entry_ptr = (unsigned*)(flat + entry_pos);

    // Store characters and pointers of branches.
    // For leafs, store entry counter.
    AvlNodeIter	iter(trie->get_node());

    while (AvlNode* next = iter.next()) {
        flat[pos++] = next->get_key();

        if (has_tail(next->get_key())) {
            *entry_ptr++ = next->next()->get_offset();
        } else {
            EntryList* entries = next->get_entries();
            *entry_ptr++ = lex_entry_list_idx.add_entries(*entries);
        }
    }

    while (pos < entry_pos) {
        flat[pos++] = '\0';
    }

    // Write flat representation of trie.
    os.write(flat, (unsigned)entry_ptr - (unsigned)flat);

    // Generate flat representation of internal nodes
    while (AvlNode* next = iter.next()) {
        if (has_tail(next->get_key())) {
            count = next->next()->generate_trie(count, os);
        }
    }

    return count;
}

AvlTrie::~AvlTrie()
{
    if (node) {
        node->delete_sons();
        delete node;
    }
}

void
AvlTrie::enter(const char* key, EntryList& entries)
{
    if (node) {
        node = node->enter(key, entries, size);
    } else {
        node = new AvlNode(key, entries); size++;
    }
}

EntryList*
AvlTrie::contains(const char* key) const
{
    if (node) {
        return node->contains(key);
    } else {
        return 0;
    }
}

ostream&
operator<<(ostream& out, AvlTrie& trie)
{
    trie.print(out, 0);
    return out;
}


//------------------------------------------------------------------------------
// AvlNodeIter class routines:
//------------------------------------------------------------------------------

//------------------------------------------------------------------------------
// Function:
//	AvlNode* AvlNodeIter::next()
//
// Description:
//	Return pointer to next entries and set next key
//	on each inorder visit using DSW marking.
//------------------------------------------------------------------------------

AvlNode* AvlNodeIter::next()
{
    int visit_left;

    while ((visit_left = node && !node->is_marked()) || stack) {
        if (visit_left) {
            // preoder visit node; pushleft
            AvlNode* tmp = node->sons[AvlNode::Left];
            node->sons[AvlNode::Left] = stack;
            stack = node;
            node = tmp;
        } else {
            if (node) {
                node->unmark();
            }

            if (!stack->is_marked()) {
                // inorder visit stack; leftright
                stack->mark();
                AvlNode* tmp = stack->sons[AvlNode::Left];
                stack->sons[AvlNode::Left] = node;
                node = stack->sons[AvlNode::Right];
                stack->sons[AvlNode::Right] = tmp;
                return stack;
            } else {
                // popright; post order visit node
                AvlNode* tmp = stack->sons[AvlNode::Right];
                stack->sons[AvlNode::Right] = node;
                node = stack;
                stack = tmp;
            }
        }
    }

    if (node) {
        node->unmark();
    }

    return 0;
}

