/*
 * $Id$
 *
 * Merge a number of files that were the result of 'sort | uniq -c'.
 *
 * See the file COPYING for license information.
 */

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>

extern char *optarg;
extern int optind;
extern int optopt;
extern int opterr;
extern int optreset;

struct record {
    FILE *fh;
    long count;
    char *text;
    int textlen;
    char *filename;
};

int nfiles;
struct record **files;
long grand_total;

#define LINESIZE	10000
#define CHECK_IF_SORTED	1

void
close_file(int pos)
{
    free(files[pos]->text);
    if (files[pos]->fh)
	fclose(files[pos]->fh);
    free(files[pos]->filename);
    free(files[pos]);

    /* Remove file from list */
    memmove(&files[pos], &files[pos + 1], (nfiles-pos-1) * sizeof files[0]);
    nfiles--;
}

int
get_line(int pos)
{
    char line[LINESIZE];/* lines should be shorter than this in any case */
    FILE *fh = files[pos]->fh;

    if (!fh) {
	close_file(pos);

	return pos;
    }
    
    char *result = fgets(line, LINESIZE, fh);

    if (!result) {
	if (ferror(fh) || !feof(fh)) {
	    fprintf(stderr, "Error occurred when reading file '%s'\n",
		    files[pos]->filename);
	}
	close_file(pos);

	return pos;
    }

    /* Parse out initial whitespace, a number, a space, a string */
    char *end;
    files[pos]->count = strtol(line, &end, 10);
    if (*end == ' ') {
	end++;
    } else {
	/* remember line from here */
    }

#if CHECK_IF_SORTED
    /*
     * Check if the file is actually sorted.
     * This is useful, since the merge algorithm requires it,
     * and the sort command may sort in some locale-specific way.
     */
    if (files[pos]->text != NULL &&
	strcmp(end, files[pos]->text) <= 0) {
	fprintf(stderr, "ERROR: in file '%s', input is not strictly ascending in ASCIIbetical order:\n%s%s",
		files[pos]->filename,
		files[pos]->text,
		end);
    }
#endif

    int len;
    /* Place line in record; make sure it will fit in allocated space */
    len = strlen(line);
    if (!files[pos]->text || len > files[pos]->textlen) {
	free(files[pos]->text);
	files[pos]->text = malloc(len + 1);
	files[pos]->textlen = len;
    }

    strcpy(files[pos]->text, end);

    return pos + 1;
}

void
open_file(char *filename)
{
    struct record *newfile = malloc(sizeof(struct record));

    newfile->text = NULL;
    newfile->textlen = 0;
    newfile->count = 0;
    newfile->fh = fopen(filename, "r");
    newfile->filename = strdup(filename);
    if (newfile->fh == NULL) {
	fprintf(stderr, "warning: could not open '%s'\n", filename);
    }

    int pos = nfiles;
    nfiles++;

    files = realloc(files, nfiles * sizeof(struct record *));
    files[pos] = newfile;

    get_line(pos);
}

int
filecmp(const void *a, const void *b)
{
    return strcmp((*(struct record **)a)->text,
	          (*(struct record **)b)->text);
}

void
sort_files()
{
    /*
     * In most cases the array is mostly sorted already (apart from the first
     * few elements), so let's hope a quicksort variant is used that
     * does not suffer under this condition.
     */
    qsort(files, nfiles, sizeof(files[0]), &filecmp);
}

void
print_sum_and_get_next_lines()
{
    char text[LINESIZE];
    long count;
    int i = 0;

    strcpy(text, files[0]->text);
    count = files[0]->count;

    i = get_line(i);

    /* Find how many identical records we have at the start */
    while (i < nfiles) {
	if (strcmp(files[i]->text, text) == 0) {
	    count += files[i]->count;
	    i = get_line(i);
	} else {
	    break;
	}
    }

    printf("%7ld %s", count, text);

    grand_total += count;
}

void
usage()
{
    fprintf(stderr, "Usage: merge-counted-files [-t] file-to-merge ...\n");
    fprintf(stderr, "       -t: grand total\n");
    fprintf(stderr, "       file-to-merge: output from sort | uniq -c: each line is\n");
    fprintf(stderr, "                      opt whitespace, a number, 1 space, text.\n");

    exit(1);
}

int
main(int argc, char **argv)
{
    int i;
    int tflag, ch;


    while ((ch = getopt(argc, argv, "ht")) != -1) {
	switch (ch) {
	    case 't':
		tflag++;
		break;
	    case 'h':
	    case '?':
	    default:
		usage();
	}
    }
    argc -= optind;
    argv += optind;

    if (argc < 1) {
	usage();
	exit(1);
    }

    for (i = 0; i < argc; i++) {
	open_file(argv[i]);
    }

    while (nfiles) {
	sort_files();
	print_sum_and_get_next_lines();
    }

    if (tflag) {
	printf("%7ld grand_total\n", grand_total);
    }

    return 0;
}

