//
// Copyright (C) 1999, 2000, Marco Kesseler
//

implementation module huffman

import StdEnum, StdMisc, StdList, StdArray
import bits, bitstream

from StdOrdList import sort
from StdOrdList import sortBy

//**********************************************************
//
//	This is a straightforward (binary) huffman tree. Each
//	Node contains two subtrees wich lead to the value of the
//	code if the current digit is zero or one respectively.
//
//	In addition each node contains the depth of the tree,
//	which indicates the maximum huffman code length.
//	Strictly speaking this is not necessary, but we include
//	this because this is handy when constructing an efficient
//	huffman decoder.
//
//	This is the 'normalised' way for representing huffman
//	codes. Usually, this is not used for (de)coding, because
//	it is far too inefficient to read a code bit-by-bit. The
//	huffmanDecoder function transforms this normalised representation
//	into an efficient decoding function.
//
//**********************************************************

:: HuffTree a	= HuffDigit Int (HuffTree a) (HuffTree a)
				| HuffValue a
				| HuffEmpty

huffDepth (HuffDigit depth _ _) = depth
huffDepth (HuffValue _) = 0
huffDepth HuffEmpty = 0

//**********************************************************
//
// Empty HuffTree for initialisation purposes
//
//**********************************************************

emptyHuffTree :: HuffTree a
emptyHuffTree = HuffEmpty

//**********************************************************
//
// huffTreeFromValuesAndLengths takes a list of (value, length)
// pairs and constructs a Huffman tree from it. The list of values
// must be ordered by increasing length.
//
// Note that we allow suboptimal Huffman trees (trees that contain
// empty nodes), simply because some programs may not specify one.
//                (not anymore, that is)
//
//**********************************************************

import StdDebug, StdString

huffTreeFromValuesAndLengths :: [(a,Int)] -> HuffTree a
huffTreeFromValuesAndLengths list
	= case rest of
		[]	-> tree
		_	-> abort "too many values to build HuffMan tree"
where
	(tree, _, rest) = build 0 list 
	
	build :: Int [(a, Int)] -> (HuffTree a, Int, [(a, Int)])
	build depth values=:[(value, length) : rest]
	| depth == length
		= (HuffValue value, 0, rest)
	| depth < length
		= (HuffDigit treeDepth left right, treeDepth, values``)
		with
			(left, leftDepth, values`)		= build (depth + 1) values
			(right, rightDepth, values``)	= build (depth + 1) values`
			treeDepth						= 1 + (max leftDepth rightDepth)
		= abort "invalid sequence of codes"
	build depth []
		= (HuffEmpty, 0, [])

//**********************************************************
//
//	huffTreeFromCountsAndValues takes a list of counts and a
//	a list of corresponding values and computes a huffman tree
//	from it.
//
//**********************************************************

huffTreeFromCountsAndValues :: [Int] [a] -> HuffTree a
huffTreeFromCountsAndValues counts values
	= huffTreeFromValuesAndLengths (nextLength 0 counts values)
where
	addLength length count [value : values] counts
	| count > 0
		= [(value, length) : addLength length (count - 1) values counts]
		= nextLength length counts [value : values]
	addLength length count [] counts
	| count > 0
		= abort "huffTreeFromCountsAndValues: too few values"
		= nextLength length counts []
	
	nextLength length [count : counts] values
		= addLength (length + 1) count values counts
	nextLength length [] values
		= []

/********************************************************************
 *
 *	These huffman decoders use layered arrays. They peek at some
 *	bits in advance, and use them as an index in the array to determine
 *	their value. Short codes have multiple entries, one for each
 *	variation of the remaining bits. Long codes refer to another table
 *	that decodes the rest of the bits.
 *
 *	Each table entry contains a decoder function that takes a stream and
 *	a state as an argument. Typically it will skip the correct number
 *	of bits in the stream and deliver some result, usually by decoding
 *	the rest of the stream recursively.
 *
 ********************************************************************/

/********************************************************************
 *
 *	Predefined entries for unused table entries and the ones that
 *	refer to other tables.
 *
 ********************************************************************/

decodeUnused stream state
	= abort "unused huffman code encountered"

decodeTable table nbits newBits stream state
	# (bits, stream) = peekBits (skipBits stream nbits) newBits
	= table.[bits] stream state

decodeRootTable table nbits  stream state
	# (bits, stream) = peekBits stream nbits
	= table.[bits] stream state

/********************************************************************
 *
 *	Exported predefined error entry
 *
 ********************************************************************/

decodeUndefined :: Int -> (stream .state -> r) | BitStream stream
decodeUndefined nbits
	= decodeUnused
	
/********************************************************************
 *
 *	Construction of a Huffman decoder.
 *
 *	This function takes a huffman tree and constructs a huffman
 *	decoder from it. This decoder looks ahead a number of bits
 *	(indicated by the lookahead argument), and uses tables to
 *	decide what needs to be done. The argument function defines
 *	the table entries.
 *
 *	This method of using tables is also used in the public domain C
 *	code of Mark Adler, but that is about all that remained of it.
 *	A few notes:
 *	- using huffman trees as input has made this code much clearer
 *	  than the C version (I think). The C version generates huffman
 *    codes by incrementing and shifting codes in the order of the
 *    code lenghts. For bit-reversed codes a special big-endian increment
 *    routine was needed. We however, simply walk the huffman tree and append a
 *    zero or a one every step down, either to the left or the right of a code.
 *  - We do not construct the table in such a way that we never read
 *	  past an EOB code. This is no problem because:
 *		- we append arbitray many zero bits at the end of the stream, so
 *		  reading past it is no issue.
 *		- in the end, we never actually remove more bits from the stream
 *        than we need
 *	  We do this, because:
 *		- this results in (slightly) better tables
 *		- this results in easier table construction
 *
 *	Details:
 *	  Most work is done in the fillTable function. It has the following
 *	  alternatives:
 *	  - if we hit a node in the huffman tree we call filltable recursively
 *	    to store both subtrees in the table. The left tree should get the
 *		current code with a '0' appended; the right tree gets the current
 *		code with a '1' appended (either to the left or the right). One
 *		exception occurs when the code does not fit in the table anymore.
 *		If so, we let the entry refer to another table that decodes the
 *		rest of the code (so we restart the code at zero).
 *	  - if we hit a leaf in the huffman tree, we put 'f length value'
 *		at index 'code' and all variations thereof. The code should
 *		fit (but we do check this). For big-endian streams the last
 *		bits define the code, and the first bits vary. For little-
 *		endian streams the first bits define the code and the last
 *		bits vary.
 *		
 ********************************************************************/

class HuffmanDecoder stream | BitStream stream
where
	huffmanDecoder :: (HuffTree a) Int ((Int a -> (stream .state -> r)), (Int -> (stream .state -> r))) -> (stream .state -> r)

instance HuffmanDecoder BEBitStream
where
	huffmanDecoder tree max_lookahead f
		= decodeRootTable (fillTable f emptyTable 0 0 lookahead tree) lookahead
	where
		lookahead = min max_lookahead (huffDepth tree)
		emptyTable	= createArray (1 << lookahead) decodeUnused
	
		fillTable f table c length lookahead tree=:(HuffDigit depth left right)
		| length < lookahead
			= table``
			with
				table``		= fillTable f table` rightCode length` lookahead right
				table`		= fillTable f table leftCode length` lookahead left
				length`		= length + 1
				leftCode	= c							// big-endian: prepend zero
				rightCode	= c + (1 << length)			// big-endian: prepend one
				
			= {table & [c] = decodeTable (fillTable f emptyTable 0 0 lookahead` tree) lookahead lookahead`}
			with
				lookahead`		= min depth lookahead
				emptyTable		= createArray (1 << lookahead`) decodeUnused
		
		fillTable (def, undef)  table c length lookahead (HuffValue value)
		| length <= lookahead
			= {table & [c + (i * period)] = entry \\ i <- [0..count-1]}
			with
				entry	= def length value
				period	= 1 << length
				count	= 1 << (lookahead - length)
			= abort "fillTable: code too long"
		
		fillTable f table c length lookahead HuffEmpty
			= table

instance HuffmanDecoder LEBitStream
where
	huffmanDecoder tree max_lookahead f
		= decodeRootTable (fillTable f emptyTable 0 0 lookahead tree) lookahead
	where
		lookahead = min max_lookahead (huffDepth tree)
		emptyTable	= createArray (1 << lookahead) decodeUnused
	
		fillTable f table c length lookahead tree=:(HuffDigit depth left right)
		| length < lookahead
			= table``
			with
				table``		= fillTable f table` rightCode length` lookahead right
				table`		= fillTable f table leftCode length` lookahead left
				length`		= length + 1
				leftCode	= (c << 1)					// little-endian: append zero
				rightCode	= leftCode bitor 1			// little-endian: append one
		| length == lookahead		
			= {table & [c] = decodeTable (fillTable f emptyTable 0 0 lookahead` tree) lookahead lookahead`}
			with
				lookahead`		= min depth lookahead
				emptyTable		= createArray (1 << lookahead`) decodeUnused
			= abort "fillTable <LEBitStream>: code too long"

		fillTable (def, undef) table c length lookahead (HuffValue value)
		| length <= lookahead
			= {table & [thisCode + i] = entry \\ i <- [0..count-1]}
			with
				thisCode	= c << bitsleft
				entry		= def length value
				bitsleft	= lookahead - length
				count		= 1 << bitsleft
			= abort "fillTable  <LEBitStream>: code too long"
		
		fillTable (def, undef) table c length lookahead HuffEmpty
		| length <= lookahead
			= {table & [thisCode + i] = entry \\ i <- [0..count-1]}
			with
				thisCode	= c << bitsleft
				entry		= undef length
				bitsleft	= lookahead - length
				count		= 1 << bitsleft
			= abort "fillTable  <LEBitStream>: code too long"

