/**
 * pdfXtk-Extras - PDF Extraction Toolkit Extras
 * Copyright (c) by the authors/contributors.  All rights reserved.
 * This project includes code from PDFBox and TouchGraph.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 3. Neither the names pdfXtk or PDF Extraction Toolkit; nor the names of its
 *    contributors may be used to endorse or promote products derived from this
 *    software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * http://pdfxtk.sourceforge.net
 *
 */
package at.ac.tuwien.dbai.pdfwrap.table;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;

import at.ac.tuwien.dbai.pdfwrap.analysis.CandidateCluster;
import at.ac.tuwien.dbai.pdfwrap.comparators.ColumnEdgeAttributeComparator;
import at.ac.tuwien.dbai.pdfwrap.comparators.XComparator;
import at.ac.tuwien.dbai.pdfwrap.comparators.YComparator;
import at.ac.tuwien.dbai.pdfwrap.model.document.CompositeSegment;
import at.ac.tuwien.dbai.pdfwrap.model.document.GenericSegment;
import at.ac.tuwien.dbai.pdfwrap.model.document.TextBlock;
import at.ac.tuwien.dbai.pdfwrap.model.document.TextLine;
import at.ac.tuwien.dbai.pdfwrap.model.document.TextSegment;
import at.ac.tuwien.dbai.pdfwrap.model.graph.AdjacencyEdge;
import at.ac.tuwien.dbai.pdfwrap.model.graph.AdjacencyGraph;
import at.ac.tuwien.dbai.pdfwrap.util.ExtraUtils;
import at.ac.tuwien.dbai.pdfwrap.utils.ListUtils;
import at.ac.tuwien.dbai.pdfwrap.utils.SegmentUtils;

// NOTE: this object used to have a much different
// function with the old table-finding algorithm.
// Now it's just practically a CTS.

/**
 * Class used to store potential tables during table-finding
 * 
 * @author Tamir Hassan, pdfanalyser@tamirhassan.com
 * @version PDF Analyser 0.9
 */
public class CandidateTable extends CompositeSegment<GenericSegment>//CandidateCluster // changed from CompositeSegment
									// contains TableColumns -> CandidateColumns and LineSegments (for ruled tables)
{
	protected List<TableColumn> columns;// = new SegmentList();
	protected List<TableRow> rows;// = new SegmentList();
	protected List<TextLine> lines;// = new SegmentList();
	protected List splitInterfaces;
	protected boolean reclustered = false;
	
	protected AdjacencyGraph ng;
	
	public CandidateTable()
	{
		super();
	}
	
	protected void findLines()
	{
		lines = new ArrayList<TextLine>();
		for (GenericSegment gs : items)
		{
			if (gs instanceof TableColumn)
			{
				TableColumn tc = (TableColumn)gs;
				for (TextBlock tb : tc.getItems())
				{
					// not necessary; is done anyway
//					System.out.println("c: " + c.toExtendedString());
//					tb.findLines(); // seems not to be in one case...
//					tb should already have foundLines!!!
//					lines.addAll(tb.getFoundLines());
					lines.addAll(tb.getItems());
				}
			}
			else
			{
				// ruling lines, etc. :)
			}
		}
	}
	
	public boolean isSplitInterface(AdjacencyEdge<? extends GenericSegment> ae, 
		AdjacencyGraph<? extends GenericSegment> ng)
	{
		// take edge
		TextSegment segFrom = (TextSegment)ae.getNodeFrom();
		TextSegment segTo = (TextSegment)ae.getNodeTo();
		
		if (ae.getDirection() == AdjacencyEdge.REL_ABOVE)
		{
			// check if there are any more upward edges from segFrom
			//	that don't lead to segTo
			
			// and if there are any more downward edges from segTo
			//	that don't lead to segFrom
			
			for (AdjacencyEdge<? extends GenericSegment> e : ng.getEdges())
			{
				if (e.getDirection() == AdjacencyEdge.REL_ABOVE &&
					e.getNodeFrom() == segFrom && 
					e.getNodeTo() != segTo &&
					e.getNodeTo().getY1() <= segTo.getY2())
					return true;
				if (e.getDirection() == AdjacencyEdge.REL_BELOW &&
					e.getNodeFrom() == segTo &&
					e.getNodeTo() != segFrom &&
					e.getNodeTo().getY2() >= segFrom.getY1())
					return true;
			}
		}
		else if (ae.getDirection() == AdjacencyEdge.REL_BELOW)
		{
			//	check if there are any more downward edges from segFrom
			//	that don't lead to segTo
			
			// and if there are any more upward edges from segTo
			//	that don't lead to segFrom
			
			for (AdjacencyEdge<? extends GenericSegment> e : ng.getEdges())
			{
				if (e.getDirection() == AdjacencyEdge.REL_BELOW &&
					e.getNodeFrom() == segFrom && 
					e.getNodeTo() != segTo &&
					e.getNodeTo().getY2() >= segTo.getY1())
					return true;
				if (e.getDirection() == AdjacencyEdge.REL_ABOVE &&
					e.getNodeFrom() == segTo &&
					e.getNodeTo() != segFrom &&
					e.getNodeTo().getY1() <= segFrom.getY2())
					return true;
			}
		}
		
		//System.out.println("edge: " + ae);
	//	System.out.println("returning false!");
		return false;
	}
	
	public void findRowsAndCols()
	{
		findLines();
		
		boolean usingLines = false;
		
		/*
		 * 17.06.10 -- heuristic for splitting lines removed -- will be done later
		 * 
		 * 
		// using items here
		SegmentList clusters = getSubItems(items);//new SegmentList();
		
//		System.out.println("clusters: " + clusters.toExtendedString());
		
		Iterator clustIter = clusters.iterator();
		while(clustIter.hasNext())
		{
			Cluster c = (Cluster)clustIter.next();
			
			System.out.println("c: " + c.toExtendedString());
			
			// we examine the SUB-ITEMS of the cluster (they are TextLine objects);
			// not its FOUNDLINE objects; they are not up-to-date
			
			c.findLines();
			c.getFoundLines().findText(false);
///			System.out.println("foundLines: " + c.getFoundLines().toExtendedString());
			
			// we work out the average text length
			Iterator itemIter = c.getFoundLines().iterator();
			int totalLength = 0;
			int totalLines = 0;
			while(itemIter.hasNext())
			{
				TextSegment ts = (TextSegment)itemIter.next();
				if (ts.intersects(c)) // hack to ignore dummy segs
				{
					if (!ts.containsNumericContent()) // if contains numbers, don't add to width
						totalLength += ts.getSegText().length();
					totalLines ++;
				}
			}
			float avgLength = totalLength / totalLines;
			
///			System.out.println("computed avg length: " + avgLength);
			
			if (totalLines == 2)
			{
				if (avgLength < 3.0f) usingLines = true;
			}
			else if (totalLines == 3)
			{
				if (avgLength < 4.0f) usingLines = true;
			}
			else if (totalLines == 4)
			{
				if (avgLength < 5.0f) usingLines = true;
			}
			else if (totalLines >= 5)
			{
				if (avgLength < 6.0f) usingLines = true;
			}
		}
		
		
		SegmentList clusterCols = findColumns(clusters);
		SegmentList lineCols = findColumns(lines);
		
		SegmentList clusterRows = findRows(getSubItems(clusterCols));
		SegmentList lineRows = findRows(getSubItems(lineCols));
		
		columns = clusterCols;
		rows = clusterRows;
		*/
		
///		System.out.println("usingLines: " + usingLines);
		
		// note: items contains other stuff such as ruling lines
		
		if (usingLines)
		{
			columns = findColumns(ExtraUtils.getCandidateColumns(items));//, lines);
			
//			2011-03-02 following line commented out
//			columns.flattenAllByOneLevel();
			// at the moment, lines within columns are TextFragments! we need a PreNGLF
			// TODO: find out in the code where that is occurring
			/*
			Iterator colIter = columns.iterator();
			while(colIter.hasNext())
			{
				TableColumn col = (TableColumn)colIter.next();
				col.findLines();
				col.setItems(col.getFoundLines());
			}*/
			
///			System.out.println("columns using lines: " + columns.toExtendedString());
			
			
			System.err.println("NOT IMPLEMENTED YET!");
//			rows = findRows(getSubItems(columns));
			
		}
		else
		{
			// works here, does not work in PageProcessor at end
			// reclusters the Clusters within Items based on linespacing
			reclustered = lineSpacingRecluster();
			
			columns = findColumns(ExtraUtils.getCandidateColumns(items));//, clusters);
//			System.out.println("found columns: " + columns.size());//getSubItems(columns));
			
			rows = findRows(getSubItemsFromColumns(columns));
//			System.out.println("found rows: " + rows.size());
//			rows.findText(false); System.out.println("found rows: " + rows);
			mergeOverlappingRows(rows);
//			System.out.println("after jvmerge rows: " + rows.size());
			
			// after mergeoverlapping it's necessary to sort the rows again
			//Collections.sort(rows, new YComparator());
			// NO... after prompter splitting in PageProcessor
			// 17.07.10
			
			// afterwards, calling methods calls content-based reclustering
		}
	}
	
	public List<TableColumn> findColumns(List<CandidateColumn> candCols)
	{
		long start = System.currentTimeMillis();
	
		AdjacencyGraph<CandidateColumn> ng = new AdjacencyGraph<CandidateColumn>();
		ng.addList(candCols);
		ng.generateEdgesSingle();
		
//			ListUtils unusedSegments = (ListUtils)origCols.clone();
		List<CandidateColumn> unusedSegments = new ArrayList<CandidateColumn>();
		for (CandidateColumn tc : candCols)
			unusedSegments.add(tc);
			
		List<TableColumn> retVal = new ArrayList<TableColumn>();
		List<AdjacencyEdge<CandidateColumn>> vertEdges = ng.vertEdges();
		
		// TODO: adjust Comparator?
		Collections.sort(vertEdges, new ColumnEdgeAttributeComparator());
		
//		System.out.println("the edges: " + vertEdges.size());
		//System.out.println(vertEdges);
		
		HashMap colHash = new HashMap();
		
		int iterations = 0;
		int one = 0;
		int two = 0;
		int three = 0;
		int four = 0;
		int five = 0;
		int six = 0;
		int seven = 0;
		int eight = 0;
		
		while(vertEdges.size() > 0)
    	{
    		AdjacencyEdge<CandidateColumn> ae = (AdjacencyEdge)vertEdges.remove(0);
    		CandidateColumn segFrom = ae.getNodeFrom();
    		CandidateColumn segTo = ae.getNodeTo();
		
    		iterations ++;
    		
    		//if (true)
    		if (segFrom != segTo) // yes, it does happen!
    		{
//	    			System.out.println("colfind ae: " + ae);
	    		if (unusedSegments.contains(segFrom) && unusedSegments.contains(segTo))
	    		{
	    			if (isSplitInterface(ae, ng))
	    			{
//		    				System.out.println("one");
	    				one ++;
	    				// add as two separate columns
	    				// add segFrom as a new column
	    				TableColumn colFrom = new TableColumn();
	    				colFrom.getItems().addAll(segFrom.getItems());
	    				colFrom.findBoundingBox();
	    				retVal.add(colFrom);
	    				colHash.put(segFrom, colFrom);
	    				unusedSegments.remove(segFrom);
	    				
	    				// add segTo as a new column
	    				TableColumn colTo = new TableColumn();
	    				colTo.getItems().addAll(segTo.getItems());
	    				colTo.findBoundingBox();
	    				retVal.add(colTo);
	    				colHash.put(segTo, colTo);
	    				unusedSegments.remove(segTo);
	    			}
	    			else
	    			{
	    				two ++;
//		    				System.out.println("two: adding segs as one col");
//		    				System.out.println("segFrom: " + segFrom);
//		    				System.out.println("segTo: " + segTo);
	    				// add as one column
	    				TableColumn newCol = new TableColumn();
	    				newCol.getItems().addAll(segFrom.getItems());
	    				newCol.getItems().addAll(segTo.getItems());
	    				newCol.findBoundingBox();
	    				retVal.add(newCol);
	    				colHash.put(segFrom, newCol);
	    				colHash.put(segTo, newCol);
	    				unusedSegments.remove(segFrom);
	    				unusedSegments.remove(segTo);
	    			}
	    		}
	    		else if (unusedSegments.contains(segFrom))
	    		{
	    			if (isSplitInterface(ae, ng))
	    			{
//		    				System.out.println("three");
	    				three ++;
	    				// add segFrom as a new column
	    				TableColumn newCol = new TableColumn();
	    				newCol.getItems().addAll(segFrom.getItems());
	    				newCol.findBoundingBox();
	    				retVal.add(newCol);
	    				colHash.put(segFrom, newCol);
	    				unusedSegments.remove(segFrom);
	    			}
	    			else
	    			{
//		    				System.out.println("four");
	    				four ++;
	    				//System.out.println("2two");
	    				TableColumn colTo = (TableColumn)colHash.get(segTo);
	    				//System.out.println("segFrom: " + segFrom);
	    				//System.out.println("colTo: " + colTo.getItems());
	    				// add segFrom to existing column to which segTo belongs
	    				colTo.getItems().addAll(segFrom.getItems());
	    				colTo.growBoundingBox(segFrom);
	    				colHash.put(segFrom, colTo);
	    				unusedSegments.remove(segFrom);
	    			}
	    		}
	    		else if (unusedSegments.contains(segTo))
	    		{
	    			if (isSplitInterface(ae, ng))
	    			{
//		    				System.out.println("five");
	    				five ++;
	    				// add segTo as a new column
	    				TableColumn newCol = new TableColumn();
	    				newCol.getItems().addAll(segTo.getItems());
	    				newCol.findBoundingBox();
	    				retVal.add(newCol);
	    				colHash.put(segTo, newCol);
	    				unusedSegments.remove(segTo);
	    			}
	    			else
	    			{
//		    				System.out.println("six");
	    				six ++;
//	    				System.out.println("six; adding to existing");
	    				TableColumn colFrom = (TableColumn)colHash.get(segFrom);
//	    				System.out.println("segTo: " + segTo);
//	    				System.out.println("colFrom: " + colFrom.getItems());
	    				// add segTo to existing column to which segFrom belongs
	    				colFrom.getItems().addAll(segTo.getItems());
	    				colFrom.growBoundingBox(segTo);
	    				colHash.put(segTo, colFrom);
	    				unusedSegments.remove(segTo);
	    			}
	    		}
	    		else
	    		{
	    			if (isSplitInterface(ae, ng))
	    			{
//		    				System.out.println("seven");
	    				seven ++;
	    				// do nothing
	    				// i.e. they would be already split if come across
	    				// as this edge would have already been investigated
	    			}
	    			else
	    			{
//		    				System.out.println("eight");
	    				eight ++;
//	    				System.out.println("eight: merging the cols if not the same col");
	    				TableColumn colFrom = (TableColumn)colHash.get(segFrom);
	    				TableColumn colTo = (TableColumn)colHash.get(segTo);
//	    				System.out.println("colFrom: " + colFrom.getItems());
//	    				System.out.println("colTo: " + colTo.getItems());
	    				
	    				if (colFrom != colTo)
	    				{
		    				// merge the two columns
	    					for (TextBlock tb : colTo.getItems())
		    				{
		    					colFrom.getItems().add(tb);
		        				colFrom.growBoundingBox(tb);
		        				colHash.remove(tb);
		        				colHash.put(tb, colFrom);
		    				}
		    				retVal.remove(colTo);
	    				}
	    			}
	    		}
    		}
    	}
		
//		System.out.println("iterations: " + iterations + " 1: " + one + " 2: " + two +
//			" 3: " + three + " 4: " + four + " 5: " + five + " 6:" + six +
//			" 7: " + seven + " 8: " + eight);
		
		// add unused (single-row) columns
//		System.out.println("unusedSegments: " + unusedSegments.toExtendedString());		
		for (CandidateColumn unusedCol : unusedSegments)
		{
			TableColumn tc = new TableColumn();
			tc.getItems().addAll(unusedCol.getItems());
			tc.findText();
			tc.findBoundingBox();
			//tc.findFont(); ???
			retVal.add(tc);
		}
		
		
//			System.out.println(columns.size() + " columns have been found.");
//		columns.findBoundingBoxes();
//			System.out.println("total time for column finding: " + (System.currentTimeMillis() - start));
		return retVal;
	}

	public List<TableRow> findRows(List<TextBlock> cells)
	{
		long start = System.currentTimeMillis();
		// this is NOT the instance variable here! 4.08.09
		List<TableRow> retVal = new ArrayList<TableRow>();
//		SegmentList cells = getCells();
		//cells.findBoundingBoxes();
//		TODO: UNNECESSARY FINDTEXT?
//		cells.findText(false);
		
		//System.out.println("table cells: " + cells.toExtendedString());
//		System.out.println("one: " + (System.currentTimeMillis() - start));
		AdjacencyGraph<TextBlock> ng = new AdjacencyGraph<TextBlock>();
		
//		System.out.println("no cells: " + cells.size());
//		System.out.println("cells: " + cells);
		
		ng.addList(cells);
		ng.generateEdgesSingle();
//		System.out.println("two: " + (System.currentTimeMillis() - start));
		//System.out.println("no cells: " + cells.size());
		//System.out.println("no clusters in cells: " + cells.getClusters().size());
		
		List<AdjacencyEdge<TextBlock>> horizEdges = ng.horizEdges();
		/*
		System.out.println("before removeDuplicateEdges: " + horizEdges.size());
		horizEdges.removeDuplicateEdges(); // doesn't seem to do anything
		System.out.println("after removeDuplicateEdges: " + horizEdges.size());
		*/
	//	SegmentList unusedCells = getCells(); // no need to clone
		
//		ListUtils unusedCells = (ListUtils)cells.clone();//.cloneSegments();
		List<TextBlock> unusedCells = new ArrayList<TextBlock>();
		for(TextBlock cell : cells)
			unusedCells.add(cell);
			
//		System.out.println("unused cells:");
//		ListUtils.printList(unusedCells);
		// rowHash contains TextBlocks, from which TableCells have been generated,
		// not the TableCells themselves...
		HashMap<TextBlock, TableRow> rowHash = new HashMap<TextBlock, TableRow>();
//		remove rowHash -- does it serve any purpose?
		
//		System.out.println("found " + horizEdges.size() + " edges");
		
		// now we follow the method in findRowsOld :)
		// NO... we follow a variant of BFC
//		System.out.println("three: " + (System.currentTimeMillis() - start));
		while(horizEdges.size() > 0)
		{
			AdjacencyEdge<TextBlock> ae = horizEdges.remove(0);
//			System.out.println("in findRows with ae: " + ae);
			
			if (true)
			//if (isSameRow(ae)) -- was used in PDFTREX comparison
			{
//				System.out.println("isSameRow");
	    		TextBlock segFrom = ae.getNodeFrom();
	    		TextBlock segTo = ae.getNodeTo();
	    		
	    		if (unusedCells.contains(segFrom) && unusedCells.contains(segTo))
	    		{
//	    			System.out.println("one");
	    			TableRow tr = new TableRow();
	    			tr.getItems().add(new TableCell(segFrom));
	    			tr.getItems().add(new TableCell(segTo));
	    			unusedCells.remove(segFrom);
	    			unusedCells.remove(segTo);
	    			rowHash.put(segFrom, tr);
	    			rowHash.put(segTo, tr);
	    			retVal.add(tr);
//	    			System.out.println("tr added: " + tr.toExtendedString());
	    		}
	    		else if (unusedCells.contains(segFrom))
	    		{
//	    			System.out.println("two");
	    			TableRow tr = rowHash.get(segTo);
	    			tr.getItems().add(new TableCell(segFrom));
	    			unusedCells.remove(segFrom);
	    			rowHash.put(segFrom, tr);
//	    			System.out.println("tr: " + tr.toExtendedString());
	    		}
	    		else if (unusedCells.contains(segTo))
	    		{
//	    			System.out.println("three");
	    			TableRow tr = rowHash.get(segFrom);
	    			tr.getItems().add(new TableCell(segTo));
	    			unusedCells.remove(segTo);
	    			rowHash.put(segTo, tr);
//	    			System.out.println("tr: " + tr.toExtendedString());
	    		}
	    		else
	    		{
//	    			System.out.println("four");
	    			// merge the segments :-)
	    			TableRow tr1 = rowHash.get(segFrom);
	    			TableRow tr2 = rowHash.get(segTo);
	    			
	    			if (tr1 != tr2)
	    			{
//	    				System.out.println("merging");
	    				tr1.getItems().addAll(tr2.getItems());
	    			//	rowHash.remove(segTo);
	    			//	rowHash.put(segTo, tr1);
	    				for (TextBlock seg : tr2.getItems())
	    				{
	    					rowHash.remove(seg);
	    					rowHash.put(seg, tr1);
	    				}
	    				retVal.remove(tr2);
	    				
//	    				System.out.println("tr1 (kept): " + tr1.toExtendedString());
//	    				System.out.println("tr2 (removed): " + tr2.toExtendedString());
	    			}
	    		}
			}
		}
		
//		System.out.println("four: " + (System.currentTimeMillis() - start));
		
		// now go through the unused segments
		for (TextBlock tb : unusedCells)
		{
			TableRow tr = new TableRow();
			tr.getItems().add(new TableCell(tb));
			rowHash.put(tb, tr);
			retVal.add(tr);
		}
		
//		System.out.println("five: " + (System.currentTimeMillis() - start));
		// see if we can read from the hashmap instead of keeping track...
		//rows.clear();
		//rows.addAll(rowHash.values());
//		rows.findBoundingBoxes();
		for (TableRow r : retVal)
			r.findBoundingBox();
		
//		System.out.println("six: " + (System.currentTimeMillis() - start));
		
		Collections.sort(retVal, new YComparator());
		//System.out.println("Rows: " + rows.toExtendedString());
//		System.out.println("total time for row finding: " + (System.currentTimeMillis() - start));
		return retVal;
	}
	
	/*
	private boolean isSameRow(AdjacencyEdge ae)
	{
		TextSegment nodeFrom = (TextSegment)ae.getNodeFrom();
		TextSegment nodeTo = (TextSegment)ae.getNodeTo();
		
		float fontSize;
		if (nodeFrom.getSegFontSize() > nodeTo.getSegFontSize())
			fontSize = nodeFrom.getSegFontSize();
		else fontSize = nodeTo.getSegFontSize();
		
		float threshold = fontSize * 0.1f;
		
		if ((nodeFrom.getY1() >= nodeTo.getY1() - threshold) &&
			nodeFrom.getY2() <= nodeTo.getY2() + threshold)
			return true;
			
		if ((nodeTo.getY1() >= nodeFrom.getY1() - threshold) &&
			nodeTo.getY2() <= nodeFrom.getY2() + threshold)
			return true;
		
		return false;
	}
	*/
	
	// pre: cols and rows have been found
	public OrderedTable toOrderedTable()
	{
		//rows.findText(false);
		//System.out.println("jv rows: " + rows);
		
		// sort columns in left-to-right order
		Collections.sort(columns, new XComparator());
		
		OrderedTable retVal = new OrderedTable();
		List<TableRow> items = retVal.getItems();
		
		// links to the column objects
		// so that we know where to add blank cells
		List<CompositeSegment<GenericSegment>> columnLinks = new ArrayList<CompositeSegment<GenericSegment>>();
																				// TableColumn or BlankCell
		
		for (TableRow tr : rows)
		{
			TableRow thisRow = new TableRow();
			CompositeSegment<GenericSegment> rowColumnLinks = new CompositeSegment<GenericSegment>();
			List<TableCell> rowItems = thisRow.getItems();
			List<GenericSegment> rowLinks = rowColumnLinks.getItems();
			
			// get all columns that intersect with this row
			List<TableColumn> intersectingColumns = new ArrayList<TableColumn>();
			for (TableColumn c : columns)
			{
				if (SegmentUtils.intersects(c, tr))
					intersectingColumns.add(c);
			}
			
			// sort them left-to-right
			Collections.sort(intersectingColumns, new XComparator());
			
			// add them one by one, and update the links
			for (TableColumn ic : intersectingColumns)
			{
//				System.out.println("intersecting column with colspan: " + ic.getColspan());
				
				// find the TextSegmentS that intersect (if any)
				List<TextBlock> intersectingItems = new ArrayList<TextBlock>();
				for (TextBlock tb : ic.getItems())
				{
					if (SegmentUtils.intersects(tb, tr))
						intersectingItems.add(tb);
					// TODO: add the intersecting segment or its items?
				}
				
				// create a TableCell object with colspan = column colspan
				TableCell tc = new TableCell();
				tc.setColspan(ic.getColspan());
//				tc.setItems(intersectingItems);
				for (TextBlock tb : intersectingItems)
					tc.getItems().addAll(tb.getItems());
				tc.findText();
				tc.findBoundingBox();
				
				// hack 15.07.10 -- method is flawed
				if (tc.getItems().size() == 0) tc = new BlankCell(ic.getColspan(), 1);
				// end of hack
				
				// add to lists
				rowItems.add(tc);
				rowLinks.add(ic);
			}
			
			items.add(thisRow);
			columnLinks.add(rowColumnLinks);
		}
		
		// find the maxHPos (max index) of each column
		HashMap<TableColumn, Integer> hPos = new HashMap<TableColumn, Integer>();
		ListUtils coords = new ListUtils();
		int highestI = 0;
		
		// items.size() == columnLinks.size()
		for (int n = 0; n < columnLinks.size(); n ++) //rows
		{
//			System.out.println("=== new row ===");
			// comment again
			TableRow thisRow = items.get(n);
			
			CompositeSegment<GenericSegment> rowColumnLinks = columnLinks.get(n);
			
			// comment again
			List<TableCell> rowItems = thisRow.getItems();
			
			List<GenericSegment> rowLinks = rowColumnLinks.getItems();
			
			int i = 0;
			for (int p = 0; p < rowLinks.size(); p ++) //cols
			{
				if(rowLinks.get(p) instanceof TableColumn)
				{
					TableColumn tc = (TableColumn)rowLinks.get(p);
					
					// keep track of an INDEX
					// when 'new setting pos', check what that 'pos' contains & shuffle if necessary
					
					if (hPos.containsKey(tc))
					{
//						System.out.println("1");
						int pos = hPos.get(tc);
//						System.out.println("pos is: " + pos);
						
						if (i > pos)
						{
//							System.out.println("Changing from pos " + pos + " to pos " + i);// + " for tc: " + tc.toExtendedString());
							int difference = i - pos;
							Integer j = new Integer(i);
							hPos.put(tc, j);
							
							// and shuffle all the existing columns
							Iterator colIter = columns.iterator();
							while(colIter.hasNext())
							{
								TableColumn tc2 = (TableColumn)colIter.next();
								if (tc != tc2 && hPos.containsKey(tc2))
								{
									int k = (int)(Integer)hPos.get(tc2);
									if (k >= i)
									{
										Integer l = new Integer(k + difference);
//										System.out.println("shuffling from k: " + k + " to l: " + l);
										hPos.put(tc2, l);
									}
								}
							}
							i += difference;
						}
						else if (i < pos)
						{
//							System.out.println("pos: " + pos);
							// add a blank
//							System.out.println("adding a blank at i: " + i);
							rowLinks.add(i, new BlankCell(1, 1));
							rowItems.add(i, new BlankCell(1, 1));
							i ++;
						}
						else
						{
//							System.out.println("2");
							// pos is OK, do not need to do anything :)
//							System.out.println("tc.getColspan: " + tc.getColspan());
							i += tc.getColspan();
							
							// added 14.06.07 -- didn't fix the bug; do a rollback to beginning of day
							/*
							for (int w = 0; w < tc.getColspan(); w ++)
							{
								rowLinks.add(i, new BlankCell(1, 1));
								rowItems.add(i, new BlankCell(1, 1));
							}
							*/
						}
					}
					else
					{
						//System.out.println("tc: " + tc.toExtendedString());
						//System.out.println("getleftmostx1 with p: " + p + " is: " + getLeftmostX1(hPos, columns, p));
						//System.out.println("getrightmostx2 with p: " + p + " is: " + getRightmostX2(hPos, columns, p));
						//System.out.println("columns with p: " + getColumnsWithIndex(hPos, columns, p).toExtendedString());
						
						TableColumn span = getHorizSpan(hPos, columns, i);
//						System.out.println("i: " + i + " span: " + span);
						//System.out.println("columns with i: " + i + ": " + getColumnsWithIndex(hPos, columns, i).toExtendedString());
						
						if (span == null || SegmentUtils.horizIntersect(span, tc.getXmid()) || 
							SegmentUtils.horizIntersect(tc, span.getXmid()))
						{
//							System.out.println("3");
							boolean futureIntersect = false;
							if (span == null)
							{
//								System.out.println("i: " + i);
//								System.out.println("highestI: " + highestI);
								// if a future column exists and intersects, add blank
								for (int s = i + 1; s <= highestI; s ++)
								{
//									System.out.println("ess equals: " + s);
									TableColumn fSpan = getHorizSpan(hPos, columns, s);
//									System.out.println("fspan: " + fSpan);
									//System.out.println("column: " + tc.toExtendedString());
									if (fSpan != null && (SegmentUtils.horizIntersect(fSpan, tc.getXmid()) 
										|| SegmentUtils.horizIntersect(tc, fSpan.getXmid())))
										futureIntersect = true;
								}
								// otherwise just add
							}
							
							if (futureIntersect)
							{
								// add a space
//								System.out.println("adding a space (future intersect) at i: " + i);
								rowLinks.add(i, new BlankCell(1, 1));
								rowItems.add(i, new BlankCell(1, 1));
								i ++;
							}
							else
							{
								// just add
//								System.out.println("New setting pos " + i);// + " for tc: " + tc.toExtendedString());
								Integer iP = new Integer(i);
								hPos.put(tc, iP);
								i += tc.getColspan();
							}
						}
						else if (tc.getXmid() > span.getXmid())
						{
							// add a space
//							System.out.println("adding a space at i: " + i);
							rowLinks.add(i, new BlankCell(1, 1));
							rowItems.add(i, new BlankCell(1, 1));
							i ++;
						}
						else // tc.getXcen() MUST < span.getXcen()
						{
							// shuffle the OTHERS.
//							System.out.println("shuffling other columns with i: " + i);
							Iterator colIter = columns.iterator();
							while(colIter.hasNext())
							{
								TableColumn tc2 = (TableColumn)colIter.next();
								if (hPos.containsKey(tc2))
								{
//									System.out.println("4");
									int k = (int)(Integer)hPos.get(tc2);
									if (tc != tc2 && k >= i)
									{
										Integer l = new Integer(k + 1);
//										System.out.println("shuffling from k: " + k + " to l: " + l);
										hPos.put(tc2, l);
									}
								}
							}
						
//							System.out.println("New setting pos after shuffling " + i);// + " for tc: " + tc.toExtendedString());
							Integer iP = new Integer(i);
							hPos.put(tc, iP);
							i += 1;
						}
					}
				}
				//i ++; // redundant?
				//i += tc.getColspan();
//				System.out.println("5");
				if (i > highestI) highestI = i;
			}
		}
			
		// add blank cells to ensure that the maxHPos is adhered to
		for (int n = 0; n < items.size(); n ++) //rows
		{
			TableRow thisRow = items.get(n);
			CompositeSegment<GenericSegment> rowColumnLinks = columnLinks.get(n);
			
			List<TableCell> rowItems = thisRow.getItems();
			List<GenericSegment> rowLinks = rowColumnLinks.getItems();
			
			//System.out.println("rowItems: " + rowItems.toExtendedString());
			
			for (int p = 0; p < rowItems.size(); p ++) //cols
			{
				//System.out.println("rowItems.size(): " + rowItems.size() + 
				//		" rowLinks.size(): " + rowLinks.size());
				if (rowLinks.get(p) instanceof TableColumn) // could be a blank cell added 11.06.07
				{
					TableColumn tc = (TableColumn)rowLinks.get(p);
					//System.out.println("table column tc: " + tc.toExtendedString());
					Integer i = (Integer)hPos.get(tc);
					int maxPos = i.intValue();
					
					//System.out.println("p: " + p);
					//System.out.println("maxPos: " + maxPos);
					//System.out.println("with tc: " + tc.toExtendedString());
					
					//if (maxPos > p)
					for (int r = p; r < maxPos; r ++)
					{
						rowItems.add(p, new BlankCell(1, 1));
						rowLinks.add(p, new BlankCell(1, 1));
						//System.out.println("adding blank cell; p++");
						p ++;
					}
				}
			}
		}
		
		
		// now get rid of blanks after colspan
		for (int n = 0; n < items.size(); n ++) //rows
		{
			TableRow thisRow = items.get(n);
			
			List<TableCell> rowItems = thisRow.getItems();
			
			for (int p = 0; p < rowItems.size(); p ++) //cols
			{
				TableCell tc = (TableCell)rowItems.get(p);
				for (int r = 1; r < tc.getColspan(); r ++)
				{
					if (rowItems.size() >= (p + 2))
						rowItems.remove(p + 1);
				}
			}
		}
		
//		retVal.setItems(items); unnecessary -- done at beginning!
		retVal.findBoundingBox();
		return retVal;
	}
	
	public void mergeOverlappingRows(List<TableRow> theRows)
	{
		boolean reRun = true;
		while(reRun)
		{
			List<TableRow> rowsToAdd = new ArrayList<TableRow>();
			List<TableRow> rowsToRemove = new ArrayList<TableRow>();
			reRun = false;
			boolean intersects = false;
			Iterator<TableRow> iter1 = theRows.iterator();
			while(iter1.hasNext() && !intersects)
			{
				TableRow r1 = iter1.next();
				Iterator<TableRow> iter2 = theRows.iterator();
				while(iter2.hasNext() && !intersects)
				{
					TableRow r2 = iter2.next();
					if (r1 != r2)
					{
	//					boolean intersects = false;
						// check if r1 intersects objects of r2 (only one dir necessary)
						Iterator<TableCell> itemIter = r2.getItems().iterator();
						while(itemIter.hasNext() && !intersects)
						{
							// Clusters with TextLines as sub-items
							TableCell c = itemIter.next();
//							System.out.println("testc: " + c.toExtendedString());
							Iterator<TextLine> subItemIter = c.getItems().iterator();
							while(subItemIter.hasNext() && !intersects)
							{
								TextLine tl = subItemIter.next();
	//							System.out.println("testtl: " + tl);
								if (SegmentUtils.horizIntersect(r1, tl.getXmid()) &&
									SegmentUtils.vertIntersect(r1, tl.getYmid()))
								{
									intersects = true;
									
									TableRow newRow = new TableRow();
									newRow.getItems().addAll(r1.getItems());
									newRow.getItems().addAll(r2.getItems());
									newRow.setCalculatedFields();
									
									rowsToAdd.add(newRow);
									rowsToRemove.add(r1);
									rowsToRemove.add(r2);
									
									reRun = true;
								}
							}
						}
						
						// if so, add appropriate objects to  add/remove lists
						// and set rerun flag to true
					}
				}
			}
			theRows.addAll(rowsToAdd);
			theRows.removeAll(rowsToRemove);
		}
	}

	public boolean lineSpacingRecluster()
	{
		boolean retVal = false;
		// get a list of clusters (sub-items of columns)
		List<TextBlock> clusters = getSubItemsFromColumns(ExtraUtils.getCandidateColumns(items));
		
//		System.out.println("***in jvLineSpacingRecluster");
		
		float minLineSpacing = Float.MAX_VALUE;
		// check if line spacing same:
			// find minimum line spacing
		for (TextBlock c : clusters)
		{
//			c.setCalculatedFields(); // finds text!
			//c.processLines(); // hoped to fix ArminPfleger.pdf
//			System.out.println("c: " + c.toExtendedString());
			
			// should do this before? then no need to recalculate
			// also, constituent lines then returned correctly
			
			CandidateCluster cc = new CandidateCluster();
			cc.getItems().addAll(c.getItems());
			cc.setCalculatedFields();
			
//			done by setCalcualtedFields()
//			cc.findLinesWidth();
//			cc.processLines();
			
//			System.out.println("found lines of c: " + cc.getFoundLines());
//			System.out.println("c.text: " + cc.getText());
//			System.out.println("c.linespacing: " + cc.getLineSpacing());
			if (cc.getLineSpacing() > 0.25 && cc.getLineSpacing() < minLineSpacing &&
				cc.getFoundLines().size() > 1)
			{
				minLineSpacing = cc.getLineSpacing();
				
//				System.out.println("minlinespacing set to " + cc.getLineSpacing());
//				System.out.println("c: " + c);
			}
		}
		
//		System.out.println("***final minlinespacing: " + minLineSpacing);
		
		// process each column at a time (iterate through them)
			// process each cluster at a time
				// check line spacing; if above minimum*threshold
					// split this cluster into its indiv. lines, creating new cluster objects
		
		List<CandidateColumn> candCols = ExtraUtils.getCandidateColumns(items);
		
		for (CandidateColumn thisCol : candCols)
		{
			List<TextBlock> itemsToAdd = new ArrayList<TextBlock>();
			List<TextBlock> itemsToRemove = new ArrayList<TextBlock>();
			
			for (TextBlock thisItem : thisCol.getItems())
			{
				if (thisItem.getLineSpacing() > 0.25 && thisItem.getLineSpacing() > minLineSpacing * 1.20f)
				{
					// split column
//					System.out.println("" + thisItem.getLineSpacing() + " splitting cluster " + minLineSpacing);
					
					System.out.println(thisItem);
					
					for (TextLine thisLine : thisItem.getItems())
					{
						TextBlock newClust = new TextBlock();
						newClust.getItems().add(thisLine);
						newClust.setCalculatedFields();
						itemsToAdd.add(newClust);
					}
					itemsToRemove.add(thisItem);
				}
			}
			
			thisCol.getItems().removeAll(itemsToRemove);
			thisCol.getItems().addAll(itemsToAdd);
			
			if (itemsToRemove.size() > 0 || itemsToAdd.size() > 0)
				retVal = true;
		}
		
		return retVal;
	}

	protected TableColumn getHorizSpan(HashMap<TableColumn, Integer> hPos, 
		List<TableColumn >cols, int pos)
	{
		TableColumn retVal = null;
		
		Iterator colIter = cols.iterator();
		while(colIter.hasNext())
		{
			TableColumn col = (TableColumn)colIter.next();
			if (col.getColspan() == 1) // TODO: could deal with partial information from 2, 3, ... if necessary
			{
				if (hPos.containsKey(col) && ((Integer)hPos.get(col)).intValue() == pos)
				{
					if (retVal == null)
					{
						retVal = new TableColumn();
						retVal.setBoundingBox(col.getBoundingBox());
					}
					else
						retVal.growBoundingBox(col);
				}
			}
		}
		
		return retVal;
	}
	
	public boolean isTable()
	{
//		System.out.println("in isTable");
		// conditions of a table:
		
		// no massive horizontal gaps, unless joined by a ruling line
		
		// minimum dimensions 2 x 2; maybe some kinda scoring system to
		// limit dimensions further if shape of table is unusual
		
		// rows should generally not collide, although some tolerance is accepted
		
		// no out-of-the-ordinary colspans, particularly with multi-line stuff
		
		findRowsAndCols(); //4.08.09
		
		// 4.08.09 -- ideas
		
		// min dimensions 2x2 -- this already done
		// minimum dimensions 2 x 2; maybe some kinda scoring system to
		// limit dimensions further if shape of table is unusual
		//if (columns.size() < 2 || rows.size() < 2) return false;
		
		// this is also test crap
		
		Iterator temp = columns.iterator();
		while(temp.hasNext())
		{
			TableColumn c = (TableColumn)temp.next();
			c.findText();
		}
		
		// TODO: this complete if statement only for testing, no functional impact
		/*
		if (columns.size() < 2 || rows.size() < 1)
		{
			System.out.println("rejecting table: " + this.toExtendedString());
		}
		else
		{
			System.out.println("approving table: " + this.toExtendedString());
		}
		*/
		
		if (columns.size() < 2 || rows.size() < 1) return false;
		
		boolean reducedTolerance = false;
		
		// reduce tolerance for small (and therefore unlikely) tables
		if (columns.size() <= 3 || rows.size() <= 3) reducedTolerance = true;
		
		return true;
	}
	
	public List<TextBlock> getCells()
	{
		List<TextBlock> retVal = new ArrayList<TextBlock>();
		
		for (TableColumn tc : columns)
		{
			retVal.addAll(tc.getItems());
		}
		return retVal;
	}
	
	public List<TextBlock> getSubItemsFromColumns(List<? extends CompositeSegment> columns)
	{
		List<TextBlock> retVal = new ArrayList<TextBlock>();
		for (GenericSegment gs : columns)
		{
			if (gs instanceof CandidateColumn)
			{
				CandidateColumn col = (CandidateColumn)gs;
				retVal.addAll(col.getItems());
			}
			else if (gs instanceof TableColumn)
			{
				TableColumn col = (TableColumn)gs;
				retVal.addAll(col.getItems());
			}
		}
//		changed 7.08.09 to remove dummy spaces, etc. that fall outside of the area
//		of the block
//		retVal.addAll(tc.getItems());
		
		/*
		Iterator colIter = colRowList.iterator();
		while(colIter.hasNext())
		{
			Object o = colIter.next(); // ignore LineSegment, etc.
			if (o instanceof CompositeSegment)
			{
				CompositeSegment tc = (CompositeSegment)o;
				

				
				Iterator itemIter = tc.getItems().iterator();
				while(itemIter.hasNext())
				{
					GenericSegment item = (GenericSegment)itemIter.next();
					if (item.intersects(tc))
						retVal.add(item);
				}
				
			}
		}
		*/
		return retVal;
	}
	
	public List<TableColumn> getColumns() {
		return columns;
	}

	public void setColumns(List<TableColumn> columns) {
		this.columns = columns;
	}

	public List<TableRow> getRows() {
		return rows;
	}

	public void setRows(List<TableRow> rows) {
		this.rows = rows;
	}

	public List<TextLine> getLines() {
		return lines;
	}

	public void setLines(List<TextLine> lines) {
		this.lines = lines;
	}

	public boolean isReclustered() {
		return reclustered;
	}

	public void setReclustered(boolean reclustered) {
		this.reclustered = reclustered;
	}
	
}
