#!/usr/local/bin/perl
# 
#------------------------------------------------------------------------#
# Usage - pr <Name of template mif file> <Input data> <Output mif file>  #
# Input Data and Output mif file can be stdin, stdout repectively        #
#                                                                        # 
#                                                    ... Sriram          #
#------------------------------------------------------------------------#

$DELIM = "\t";

if (@ARGV == 1) {
    $ARGV[1] = "-";
    $ARGV[2] = "-";
} elsif (@ARGV == 2) {
    $ARGV[2] = "-";
} elsif (@ARGV != 3) {
    print ("Usage:  $0 <Template MIF File > <Input data file> <Output Mif File>\n");
    print ("Input/Output files can be \"-\" for stdin/stdout repectively\n");
    exit(1);
}

#First parse the input data file

				# 
open (IN, "$ARGV[1]") || die "Couldn't open $ARGV[1]\n";
while ($line = <IN>) {
    if ($line =~ /Table\s/) {
	# A table definition
	$gTableId++; # Global id to keep track of groups of columns

	@colNames = split (/\s/, $line);
	shift (@colNames); # Gets rid of the word "Table"

	#@gColNames keeps track of all columns in a table
	#This is to prevent the user from specifying different column
	#names from different tables

	$gColNames[$gTableId] = join (" ", @colNames);
	#Check if any of the colNames have been found before

	# %gCols is a global hash to enforce uniqueness of column names
	foreach $c (@colNames) {
	    if (defined ($gCols {$c})) {
		print ("Error : Duplicate Column Name Found $c \n");
		print ("Aborting ... \n");
		exit(1);
	    } else {
		$gCols{$c} = $gTableId;
	    }
	}

	$buf = "";

	# Slurp in the rest of the lines until TableEnd (or EOF)
	while ($line = <IN>) {
	    last if ($line =~ /TableEnd/) ;
	    # Escape all "/" because it screws up the s/// stmt later on.
	    $line =~ s#/#\\/#; 
	    $buf .= $line;
	}

	# @gTableBuf stores the data, indexed by tableId.
	$gTableBuf[$gTableId] = $buf;

    } else {
	# Must be a simple name-value pair
	# Value - from the first non space character after the name, till eoln
	($name, $value) = $line =~ / *([A-z]+)\s*(.*$)/;

	if (!($name =~ /^\s*$/o)) {
	    #Escape "/", otherwise screws up in the s/// stmt. later on.
	    $value =~ s#/#\\/#; 
	    $primitiveNames{$name} = $value;
	}
    }
}
close (IN);

    
# Create a function dynamically to do primitive substitutions effectively
# This function will be run on every paragraph.

$sub = "sub DoSimpleSubstitutions {";
$sub .= "local (\$buf) = \$_[0];";
foreach $n (keys %primitiveNames) {
    $sub .= "\$buf =~ s/[@]$n/$primitiveNames{$n}/go;";
}
$sub .= "return \$buf;";
$sub .= "}";
eval $sub;


# We have absorbed the input file, and are ready to step through the input
# .mif file.

open (TPL, "$ARGV[0]");
open (OUT, "> $ARGV[2]");

while ($line = <TPL>) {
    # We deal with only paragraphs or rows inside a table
    if ($line =~ /<Para/o) {
	print OUT $line;
	&DoPara;
    } elsif ($line =~ /<Row/o) {
	&DoRow;   #DoRow needs "<Row", so it will be printed later.
    }  else {
	# Uninteresting stuff ... write it out without change
	print OUT $line;
    }
}

close (TPL);
close (OUT);
exit(0);


#-------------------------------------------------------------------------
# DoPara absorbs everything within <Para .... >, and does all simple
# substitutions on the entire buffer before writing it out. Sure, tables are
# embedded in paragraphs, but the way .mif is organized, all tables in
# the document are written out first, given unique IDs, and then the main
# textflow starts where these ids are referred to. So there is no hassle
# of handling table related stuff inside a paragraph, since it would have
# been handled earlier by DoRow
#-------------------------------------------------------------------------

sub DoPara {
    local ($buf);
    #First absorb into buffer until the corresponding end of Para
    $buf = &Absorb;
    
    #Use this buffer as a template doing as many substitutions as necessary
    $buf =  &DoSimpleSubstitutions ($buf);
    print OUT $buf;
}

#-------------------------------------------------------------------------
# DoRow handles a "<Row" statement. A FrameMaker table "<Tbl" contains zero
# to many rows. In the template file, for any column variable that is 
# mentioned, the entire row is replicated as many times as necessary.
#-------------------------------------------------------------------------

sub DoRow {
    local ($buf);
    local ($i);
    local ($tableId);
    
    #First absorb into buffer until the corresponding end of <Row
    $buf = &Absorb;
    $buf = "<Row\n" . $buf;   

    # Now $buf contains an entire <Row .... >
    # &Absorb has kept track of whether any table variables
    # were found. If not the buffer is printed as is. 
    
    if (@gColsFound) {
	($tableId, @colIndices) = &StudyCols();
	@colsInThisTable = split (/ /, $gColNames[$tableId]);
	@rows = split (/\n/, $gTableBuf[$tableId]);
	foreach $r (@rows) {
	    $tmp = $buf;
	    @vals = split (/$DELIM/, $r);
	    foreach $c (@colIndices) {
		$tmp =~ s/\@$colsInThisTable[$c]/$vals[$c]/g;
	    }
	    print OUT $tmp;
	}
    } else {
	print OUT $buf;
    }
}

#----------------------------------------------------------------------------
# If a row in a template file table has one or more column variables, 
# StudyCols first ascertains that they belong to the same table. If so,
# it creates a list of column indices, that is used by DoRow to directly
# index into each row of the data. This is necessary because a table in 
# the input data may have the columns ordered in one way, and the template
# mif file may want only some columns out of any table, ordered in it's 
# own way. StudyCols studies the columns in the template mif file.
#----------------------------------------------------------------------------

sub StudyCols {
    local (@colsInThisTable);
    local ($tableId) = $gCols{$gColsFound[0]};
    local ($tmpId);
    local ($c);
    local (@colIndices);
    foreach $c (@gColsFound) {
	$tmpId = $gCols{$c};
	if ($tmpId != $tableId) {
	    print "These columns are not found in the same table in the input file\n";
	    print "\"@gColsFound\n\"";
	    print "Aborting\n";
	    exit(1);
	}
    }
    @colsInThisTable = split (/\s/, $gColNames[$tableId]);

    $i = 0;
    @colIndices = ();
    foreach $c (@colsInThisTable) {
	foreach $cf (@gColsFound) {
	    if ($c eq $cf) {
		push(@colIndices, $i);
		last;
	    }
	}
	$i++;
    }
    return ($tableId, @colIndices);
}
				# 
#------------------------------------------------------------------# 
# Absorb (store all input into $buf) till a *corresponding* ">"
# comes by.
#------------------------------------------------------------------# 

sub Absorb {
    local ($line) = "";
    local ($buf) = "";
    local ($count) = 1;
    @gColsFound = ();

    while ($line = <TPL>) {

	# Turns out that we need to count only those "<" and ">" that are
	# by themselves in a line. Most one-line statements are balanced 
	#" ("<....>") so they are not counted.

	if ($line =~ /^ *<[^>]*$/) {
	    $count++;
	} elsif ($line =~ /^ *>/) {
	    $count--;
	}
	
	# While we have the line, might as well check if there are any
	# table variables around
	if ($line =~ /@(\w+)/) {
	    # Yep. This line does have at least one @variable
	    $tmp = $line;
	    # Find out all the @variables it has, by repeated kicking
	    # out the first @var, and extracting var into $1
	    while ($tmp =~ s/\@(\w+)//) {
		if (defined ($gCols{$1})) {
		    # Yes .. this is a table column
		    push (@gColsFound , $1);
		}
	    }
	}
	$buf .= $line;
	if ($count == 0) {
	    # returned to corresponding angle bracket
	    return $buf;
	}

	# Haven't done too much defensive programming such as checking for 
	# bracket mismatches, since one would have to be mad to produce a mif
	# file by hand, and producing one using Frame will produce syntactically
	# correct mif.
    }
}



