/* Read stdin, produce C table for huffman decode & encode */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>

struct symbol
{
	unsigned long long addr;
	char *name;
};

static const char acceptable_chars[]
= "_0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";

struct huff
{
	struct huff *next, *prev;

	/* If these are NULL, we're a terminal character */
	struct huff *a, *b;
	int acceptable_index;
	unsigned int freq;
};

static inline int terminal(const struct huff *h)
{
	return !h->a;
}

static int addr_cmp(const void *s1, const void *s2)
{
	long long diff;

	diff = ((struct symbol *)s1)->addr - ((struct symbol *)s2)->addr;
	if (diff > 0) return 1;
	else if (diff < 0) return -1;
	else return 0;
}

/* Read the input in format "address name" */
static struct symbol *read_in_symbols(FILE *in)
{
	char name[200];
	unsigned long long addr;
	unsigned int done = 0;
	struct symbol *symbols = NULL;

	while (fscanf(in, "%llx %s", &addr, name) == 2) {
		/* We can't encode strange symbols. */
		if (strspn(name, acceptable_chars) != strlen(name)) {
			fprintf(stderr, "Ignoring symbol %s\n", name);
			continue;
		}
		symbols = realloc(symbols, (done+2) * sizeof(*symbols));
		symbols[done].addr = addr;
		symbols[done].name = strdup(name);
		done++;
	}

	/* Sort into ascending address order. */
	qsort(symbols, done, sizeof(symbols[0]), addr_cmp);
	/* Mark terminus */
	symbols[done].name = NULL;
	return symbols;
}

static int acceptable_index(char c)
{
	if (!c) return 63;
	return strchr(acceptable_chars, c) - acceptable_chars;
}

static void replace_least_frequent(struct huff *head)
{
	struct huff *a = head, *b = head, *i;

	for (i = head->next; i != head; i = i->next) {
		if (i->freq < a->freq) {
			b = a;
			a = i;
		} else if (i->freq < b->freq) b = i;
	}

	i = malloc(sizeof(struct huff));
	i->a = a; i->b = b;
	i->freq = a->freq + b->freq;
	/* Delete a and b from list */
	a->next->prev = a->prev;
	a->prev->next = a->next;
	b->next->prev = b->prev;
	b->prev->next = b->next;
	a->next = b->next = NULL;

	/* Insert new element into list */
	i->next = head->next;
	i->prev = head;
	head->next = i;
	i->next->prev = i;
}

static inline void set_bit(int bit, unsigned char *addr, unsigned int bitnum)
{
	if (bit) addr[bitnum / 8] |= (1 << (bitnum % 8));
	else addr[bitnum / 8] &= ~(1 << (bitnum % 8));
}

static void dump_bits(unsigned char bitpattern[], unsigned int bits)
{
	unsigned int i;

	for (i = 0; i < (bits + 7)/8; i++)
		printf("%u, ", bitpattern[i]);
}

static void dump_encode_table(const struct huff *head,
			      unsigned char bitpattern[],
			      unsigned int bits)
{
	if (terminal(head)) {
		printf("[%u] = { %u, { ",
		       head->acceptable_index, bits);
		dump_bits(bitpattern, bits);
		printf("} },\n");
	} else {
		/* We want to leave bit set at 0 when finished, so do
                   b first */
		set_bit(1, bitpattern, bits);
		dump_encode_table(head->b, bitpattern, bits+1);
		set_bit(0, bitpattern, bits);
		dump_encode_table(head->a, bitpattern, bits+1);
	}
}

static struct huff *huffman(const struct symbol *symbols)
{
	unsigned int i;
	struct huff *huff_base;
	struct huff *head;

	head = malloc(sizeof *head);
	huff_base = malloc(sizeof(huff_base[0]) * 64);

	for (i = 0; i < 64; i++) {
		huff_base[i].prev = &huff_base[i-1];
		huff_base[i].next = &huff_base[i+1];
		huff_base[i].a = huff_base[i].b = NULL;
		huff_base[i].acceptable_index = i;
		huff_base[i].freq = 0;
	}
	huff_base[0].prev = huff_base[63].next = head;
	head->next = &huff_base[0];
	head->prev = &huff_base[63];
	head->freq = UINT_MAX;

	for (i = 0; symbols[i].name; i++) {
		unsigned int j;

		for (j = 0; symbols[i].name[j]; j++)
			huff_base[acceptable_index(symbols[i].name[j])].freq++;
		huff_base[acceptable_index(0)].freq++;
	}

	while (head->next->next != head)
		replace_least_frequent(head);

	return head;
}

/* max is max allocated slot. */
static unsigned int dump_decode_table(const struct huff *head,
				      unsigned int pos,
				      unsigned int max)
{
	unsigned int a = 0, b = 0;

	printf("\n\t[%u] = ", pos);
	if (terminal(head->a))
		printf("{ { %i, ", -(int)head->a->acceptable_index);
	else {
		a = ++max;
		printf("{ { %u, ", a - pos);
	}

	if (terminal(head->b))
		printf("%i } }, ", -(int)head->b->acceptable_index);
	else {
		b = ++max;
		printf("%u } }, ", b - pos);
	}

	if (a) max = dump_decode_table(head->a, a, max);
	if (b) max = dump_decode_table(head->b, b, max);

	return max;
}

static void dump_decode(const struct huff *head)
{
	printf("struct huff_decode_table { signed char jump[2]; };\n");
	printf("static const struct huff_decode_table huff_decode[] = { ");
	dump_decode_table(head->next, 0, 0);
	printf("};\n");
}

static void dump_encode(const struct huff *head)
{
	unsigned char bitpattern[20] = { 0 };
	unsigned int bits;

	/* FIXME: Figure actual max num bits first, rather than assuming [3] */
	printf("struct huff_encode_table { unsigned char num; unsigned char bits[3]; };\n\n");
	printf("static const struct huff_encode_table huff_encode[] = {\n");
	bits = 0;
	dump_encode_table(head->next, bitpattern, bits);
	printf("};\n");
}

int main(int argc, char *argv[])
{
	const struct huff *head;
	struct symbol *symbols;
	FILE *file;

	if (argc == 3)
		file = fopen(argv[2], "r");
	else
		file = stdin;

	/* Get array of symbols */
	symbols = read_in_symbols(file);

	/* Do huffman */
	head = huffman(symbols);

	if (strcmp(argv[1], "--encode") == 0) {
		printf("static const char huff_enc_chars[]\n\t= \"%s\";\n",
		       acceptable_chars);
		dump_encode(head);
	} else {
		printf("static const char huff_dec_chars[]\n\t= \"%s\";\n",
		       acceptable_chars);
		dump_decode(head);
	}
		
	return 0;
}
