/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.trees;

import edu.stanford.nlp.io.ExtensionFileFilter;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.StringLabel;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.trees.LabeledScoredTreeReaderFactory;
import edu.stanford.nlp.trees.TransformingTreebank;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeReaderFactory;
import edu.stanford.nlp.trees.TreeTransformer;
import edu.stanford.nlp.trees.TreeVisitor;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.util.Sets;
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.text.NumberFormat;
import java.util.AbstractCollection;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

public abstract class Treebank
extends AbstractCollection<Tree> {
    private TreeReaderFactory trf;
    private String encoding = "UTF-8";
    public static final String DEFAULT_TREE_FILE_SUFFIX = "mrg";

    public Treebank() {
        this(new LabeledScoredTreeReaderFactory());
    }

    public Treebank(TreeReaderFactory trf) {
        this.trf = trf;
    }

    public Treebank(TreeReaderFactory trf, String encoding) {
        this.trf = trf;
        this.encoding = encoding;
    }

    public Treebank(int initialCapacity) {
        this(initialCapacity, new LabeledScoredTreeReaderFactory());
    }

    public Treebank(int initialCapacity, TreeReaderFactory trf) {
        this.trf = trf;
    }

    protected TreeReaderFactory treeReaderFactory() {
        return this.trf;
    }

    public String encoding() {
        return this.encoding;
    }

    @Override
    public abstract void clear();

    public void loadPath(String pathName) {
        this.loadPath(new File(pathName));
    }

    public void loadPath(File path) {
        this.loadPath(path, DEFAULT_TREE_FILE_SUFFIX, true);
    }

    public void loadPath(String pathName, String suffix, boolean recursively) {
        this.loadPath(new File(pathName), (FileFilter)new ExtensionFileFilter(suffix, recursively));
    }

    public void loadPath(File path, String suffix, boolean recursively) {
        this.loadPath(path, (FileFilter)new ExtensionFileFilter(suffix, recursively));
    }

    public void loadPath(String pathName, FileFilter filt) {
        this.loadPath(new File(pathName), filt);
    }

    public abstract void loadPath(File var1, FileFilter var2);

    public abstract void apply(TreeVisitor var1);

    public Treebank transform(TreeTransformer treeTrans) {
        return new TransformingTreebank(this, treeTrans);
    }

    @Override
    public String toString() {
        final StringBuilder sb = new StringBuilder();
        this.apply(new TreeVisitor(){

            @Override
            public void visitTree(Tree t) {
                sb.append(t.toString());
                sb.append('\n');
            }
        });
        return sb.toString();
    }

    @Override
    public int size() {
        CounterTreeProcessor counter = new CounterTreeProcessor();
        this.apply(counter);
        return counter.total();
    }

    public void decimate(Writer trainW, Writer devW, Writer testW) throws IOException {
        PrintWriter trainPW = new PrintWriter(trainW, true);
        PrintWriter devPW = new PrintWriter(devW, true);
        PrintWriter testPW = new PrintWriter(testW, true);
        int i = 0;
        for (Tree t : this) {
            if (i == 8) {
                t.pennPrint(devPW);
            } else if (i == 9) {
                t.pennPrint(testPW);
            } else {
                t.pennPrint(trainPW);
            }
            i = (i + 1) % 10;
        }
    }

    public String textualSummary() {
        return this.textualSummary(null);
    }

    public String textualSummary(TreebankLanguagePack tlp) {
        int numTrees = 0;
        int numTreesLE40 = 0;
        int numNonUnaryRoots = 0;
        Tree nonUnaryEg = null;
        ClassicCounter<Tree> nonUnaries = new ClassicCounter<Tree>();
        ClassicCounter<String> roots = new ClassicCounter<String>();
        ClassicCounter<String> starts = new ClassicCounter<String>();
        ClassicCounter<String> puncts = new ClassicCounter<String>();
        int numUnenclosedLeaves = 0;
        int numLeaves = 0;
        int numNonPhrasal = 0;
        int numPreTerminalWithMultipleChildren = 0;
        int numWords = 0;
        int numTags = 0;
        int shortestSentence = Integer.MAX_VALUE;
        int longestSentence = 0;
        int numNullLabel = 0;
        HashSet<String> words = new HashSet<String>();
        ClassicCounter<String> tags = new ClassicCounter<String>();
        ClassicCounter<String> cats = new ClassicCounter<String>();
        Tree leafEg = null;
        Tree preTerminalMultipleChildrenEg = null;
        Tree nullLabelEg = null;
        Tree rootRewritesAsTaggedWordEg = null;
        for (Tree t : this) {
            roots.incrementCount(t.value());
            ++numTrees;
            int leng = t.yield().size();
            if (leng <= 40) {
                ++numTreesLE40;
            }
            if (leng < shortestSentence) {
                shortestSentence = leng;
            }
            if (leng > longestSentence) {
                longestSentence = leng;
            }
            if (t.numChildren() > 1) {
                if (numNonUnaryRoots == 0) {
                    nonUnaryEg = t;
                }
                if (numNonUnaryRoots < 100) {
                    nonUnaries.incrementCount(t.localTree());
                }
                ++numNonUnaryRoots;
            } else if (t.isLeaf()) {
                ++numUnenclosedLeaves;
            } else {
                Tree t2 = t.firstChild();
                if (t2.isLeaf()) {
                    ++numLeaves;
                    leafEg = t;
                } else if (t2.isPreTerminal()) {
                    if (numNonPhrasal == 0) {
                        rootRewritesAsTaggedWordEg = t;
                    }
                    ++numNonPhrasal;
                }
                starts.incrementCount(t2.value());
            }
            for (Tree subtree : t) {
                Label lab = subtree.label();
                if (lab == null || lab.value() == null || "".equals(lab.value())) {
                    if (numNullLabel == 0) {
                        nullLabelEg = subtree;
                    }
                    ++numNullLabel;
                    if (lab == null) {
                        subtree.setLabel(new StringLabel(""));
                    } else if (lab.value() == null) {
                        subtree.label().setValue("");
                    }
                }
                if (subtree.isLeaf()) {
                    ++numWords;
                    words.add(subtree.value());
                    continue;
                }
                if (subtree.isPreTerminal()) {
                    ++numTags;
                    tags.incrementCount(subtree.value());
                    if (tlp == null || !tlp.isPunctuationTag(subtree.value())) continue;
                    puncts.incrementCount(subtree.firstChild().value());
                    continue;
                }
                if (subtree.isPhrasal()) {
                    boolean hasLeafChild = false;
                    for (Tree kt : subtree.children()) {
                        if (!kt.isLeaf()) continue;
                        hasLeafChild = true;
                    }
                    if (hasLeafChild) {
                        ++numPreTerminalWithMultipleChildren;
                        if (preTerminalMultipleChildrenEg == null) {
                            preTerminalMultipleChildrenEg = subtree;
                        }
                    }
                    cats.incrementCount(subtree.value());
                    continue;
                }
                throw new IllegalStateException("Treebank: Bad tree in treebank!: " + subtree);
            }
        }
        StringWriter sw = new StringWriter(2000);
        PrintWriter pw = new PrintWriter(sw);
        NumberFormat nf = NumberFormat.getNumberInstance();
        nf.setMaximumFractionDigits(0);
        pw.println("Treebank has " + numTrees + " trees (" + numTreesLE40 + " of length <= 40) and " + numWords + " words (tokens)");
        if (numTrees > 0) {
            Set joint;
            if (numTags != numWords) {
                pw.println("  Warning! numTags differs and is " + numTags);
            }
            if (roots.size() == 1) {
                String root = (String)roots.keySet().toArray()[0];
                pw.println("  The root category is: " + root);
            } else {
                pw.println("  Warning! " + roots.size() + " different roots in treebank: " + Counters.toString(roots, nf));
            }
            if (numNonUnaryRoots > 0) {
                pw.print("  Warning! " + numNonUnaryRoots + " trees without unary initial rewrite.  ");
                if (numNonUnaryRoots > 100) {
                    pw.print("First 100 ");
                }
                pw.println("Rewrites: " + Counters.toString(nonUnaries, nf));
                pw.println("    Example: " + nonUnaryEg);
            }
            if (numUnenclosedLeaves > 0 || numLeaves > 0 || numNonPhrasal > 0) {
                pw.println("  Warning! Non-phrasal trees: " + numUnenclosedLeaves + " bare leaves; " + numLeaves + " root rewrites as leaf; and " + numNonPhrasal + " root rewrites as tagged word");
                if (numLeaves > 0) {
                    pw.println("  Example bad root rewrites as leaf: " + leafEg);
                }
                if (numNonPhrasal > 0) {
                    pw.println("  Example bad root rewrites as tagged word: " + rootRewritesAsTaggedWordEg);
                }
            }
            if (numNullLabel > 0) {
                pw.println("  Warning!  " + numNullLabel + " tree nodes with null or empty string labels, e.g.:");
                pw.println("    " + nullLabelEg);
            }
            if (numPreTerminalWithMultipleChildren > 0) {
                pw.println("  Warning! " + numPreTerminalWithMultipleChildren + " preterminal nodes with multiple children.");
                pw.println("    Example: " + preTerminalMultipleChildrenEg);
            }
            pw.println("  Sentences range from " + shortestSentence + " to " + longestSentence + " words, with an average length of " + (double)(numWords * 100 / numTrees) / 100.0 + " words.");
            pw.println("  " + cats.size() + " phrasal category types, " + tags.size() + " tag types, and " + words.size() + " word types");
            String[] empties = new String[]{"*", "0", "*T*", "*RNR*", "*U*", "*?*", "*EXP*", "*ICH*", "*NOT*", "*PPA*", "*OP*", "*pro*", "*PRO*"};
            HashSet<String> knownEmpties = new HashSet<String>(Arrays.asList(empties));
            Set<String> emptiesIntersection = Sets.intersection(words, knownEmpties);
            if (!emptiesIntersection.isEmpty()) {
                pw.println("  Caution! " + emptiesIntersection.size() + " word types are known empty elements: " + emptiesIntersection);
            }
            if (!(joint = Sets.intersection(cats.keySet(), tags.keySet())).isEmpty()) {
                pw.println("  Warning! " + joint.size() + " items are tags and categories: " + joint);
            }
            for (String cat : cats.keySet()) {
                if (cat == null || !cat.contains("@")) continue;
                pw.println("  Warning!!  Stanford Parser does not work with categories containing '@' like: " + cat);
                break;
            }
            for (String cat : tags.keySet()) {
                if (cat == null || !cat.contains("@")) continue;
                pw.println("  Warning!!  Stanford Parser does not work with tags containing '@' like: " + cat);
                break;
            }
            pw.println("    Cats: " + Counters.toString(cats, nf));
            pw.println("    Tags: " + Counters.toString(tags, nf));
            pw.println("    " + starts.size() + " start categories: " + Counters.toString(starts, nf));
            if (!puncts.isEmpty()) {
                pw.println("    Puncts: " + Counters.toString(puncts, nf));
            }
        }
        return sw.toString();
    }

    @Override
    public boolean remove(Object o) {
        throw new UnsupportedOperationException("Treebank is read-only");
    }

    private static final class CounterTreeProcessor
    implements TreeVisitor {
        int i;

        private CounterTreeProcessor() {
        }

        @Override
        public void visitTree(Tree t) {
            ++this.i;
        }

        public int total() {
            return this.i;
        }
    }
}

