package edu.stanford.nlp.parser.lexparser;

import com.hp.hpl.jena.sparql.sse.Tags;
import edu.stanford.nlp.ling.CategoryWordTag;
import edu.stanford.nlp.ling.CategoryWordTagFactory;
import edu.stanford.nlp.ling.HasTag;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.trees.BobChrisTreeNormalizer;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.ModCollinsHeadFinder;
import edu.stanford.nlp.trees.PennTreeReader;
import edu.stanford.nlp.trees.PennTreebankLanguagePack;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.trees.TreeReader;
import edu.stanford.nlp.trees.TreeReaderFactory;
import edu.stanford.nlp.trees.TreeTransformer;
import edu.stanford.nlp.trees.Treebank;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.apache.jena.atlas.json.io.JSWriter;

/* loaded from: input_file:WEB-INF/lib/stanford-corenlp-3.4.1.jar:edu/stanford/nlp/parser/lexparser/TreeBinarizer.class */
public class TreeBinarizer implements TreeTransformer {
    private static final boolean DEBUG = false;
    private HeadFinder hf;
    private TreebankLanguagePack tlp;
    private boolean insideFactor;
    private boolean markovFactor;
    private int markovOrder;
    private boolean useWrappingLabels;
    private double selectiveSplitThreshold;
    private boolean markFinalStates;
    private boolean unaryAtTop;
    private final boolean simpleLabels;
    private final boolean noRebinarization;
    private boolean doSelectiveSplit = false;
    private ClassicCounter<String> stateCounter = new ClassicCounter<>();
    private TreeFactory tf = new LabeledScoredTreeFactory(new CategoryWordTagFactory());

    public void setDoSelectiveSplit(boolean z) {
        this.doSelectiveSplit = z;
        if (z) {
            return;
        }
        this.stateCounter = new ClassicCounter<>();
    }

    private static String join(List<Tree> list) {
        StringBuilder sb = new StringBuilder();
        Iterator<Tree> it = list.iterator();
        while (it.hasNext()) {
            sb.append(it.next().label().value());
            if (it.hasNext()) {
                sb.append(StringUtils.SPACE);
            }
        }
        return sb.toString();
    }

    private static void localTreeString(Tree tree, StringBuilder sb, int i) {
        sb.append("\n");
        for (int i2 = 0; i2 < i; i2++) {
            sb.append("  ");
        }
        sb.append("(").append(tree.label());
        if (i == 0 || isSynthetic(tree.label().value())) {
            for (int i3 = 0; i3 < tree.numChildren(); i3++) {
                localTreeString(tree.getChild(i3), sb, i + 1);
            }
        }
        sb.append(")");
    }

    protected static boolean isSynthetic(String str) {
        return str.indexOf(64) > -1;
    }

    Tree binarizeLocalTree(Tree tree, int i, TaggedWord taggedWord) {
        if (!this.markovFactor) {
            return this.insideFactor ? insideBinarizeLocalTree(tree, i, taggedWord, 0, 0) : outsideBinarizeLocalTree(tree, tree.label().value(), tree.label().value(), i, taggedWord, 0, "", 0, "");
        }
        String value = tree.label().value();
        tree.setLabel(new CategoryWordTag(value, taggedWord.word(), taggedWord.tag()));
        return this.insideFactor ? markovInsideBinarizeLocalTreeNew(tree, i, 0, tree.numChildren() - 1, true) : markovOutsideBinarizeLocalTree(tree, taggedWord, i, value, new LinkedList<>(), false);
    }

    private Tree markovOutsideBinarizeLocalTree(Tree tree, TaggedWord taggedWord, int i, String str, LinkedList<Tree> linkedList, boolean z) {
        String str2;
        String str3;
        String word = taggedWord.word();
        String tag = taggedWord.tag();
        ArrayList arrayList = new ArrayList(2);
        if (i != 0) {
            if (i <= 0) {
                return tree;
            }
            linkedList.addLast(tree.getChild(0));
            if (linkedList.size() > this.markovOrder) {
                linkedList.removeFirst();
            }
            if (this.simpleLabels) {
                str2 = "@" + str;
            } else {
                str2 = "@" + str + ": " + (join(linkedList) + (i > this.markovOrder - 1 ? " ..." : "")) + StringUtils.SPACE + tree.getChild(i).label().value() + JSWriter.ArrayFinish;
            }
            Tree newTreeNode = this.tf.newTreeNode(new CategoryWordTag(str2, word, tag), tree.getChildrenAsList().subList(1, tree.numChildren()));
            arrayList.add(tree.getChild(0));
            arrayList.add(markovOutsideBinarizeLocalTree(newTreeNode, taggedWord, i - 1, str, linkedList, false));
            return this.tf.newTreeNode(tree.label(), arrayList);
        }
        if (!z) {
            if (this.tlp.isStartSymbol(str)) {
                return markovOutsideBinarizeLocalTree(tree, taggedWord, i, str, new LinkedList<>(), true);
            }
            arrayList.add(markovOutsideBinarizeLocalTree(this.tf.newTreeNode(new CategoryWordTag(this.simpleLabels ? "@" + str : "@" + str + ": " + tree.getChild(i).label().value() + JSWriter.ArrayFinish, word, tag), tree.getChildrenAsList()), taggedWord, i, str, new LinkedList<>(), true));
            return this.tf.newTreeNode(tree.label(), arrayList);
        }
        int numChildren = tree.numChildren();
        if (numChildren == 1) {
            return this.tf.newTreeNode(tree.label(), Collections.singletonList(tree.getChild(0)));
        }
        linkedList.addFirst(tree.getChild(numChildren - 1));
        if (linkedList.size() > this.markovOrder) {
            linkedList.removeLast();
        }
        if (this.simpleLabels) {
            str3 = "@" + str;
        } else {
            str3 = "@" + str + ": " + tree.getChild(i).label().value() + StringUtils.SPACE + ((numChildren > this.markovOrder - 1 ? "... " : "") + join(linkedList));
        }
        arrayList.add(markovOutsideBinarizeLocalTree(this.tf.newTreeNode(new CategoryWordTag(str3, word, tag), tree.getChildrenAsList().subList(0, numChildren - 1)), taggedWord, i, str, linkedList, true));
        arrayList.add(tree.getChild(numChildren - 1));
        return this.tf.newTreeNode(tree.label(), arrayList);
    }

    private Tree markovInsideBinarizeLocalTreeNew(Tree tree, int i, int i2, int i3, boolean z) {
        Tree[] children = tree.children();
        if (z) {
            if (i2 == i && i3 == i) {
                return tree;
            }
            if (this.noRebinarization && children.length == 2) {
                return tree;
            }
            if (this.unaryAtTop) {
                return this.tf.newTreeNode(tree.label(), Collections.singletonList(markovInsideBinarizeLocalTreeNew(tree, i, i2, i3, false)));
            }
        }
        List<Tree> list = null;
        if (i2 == i && i3 == i) {
            list = Collections.singletonList(children[i]);
        } else if (i2 < i) {
            list = new ArrayList(2);
            list.add(children[i2]);
            list.add(markovInsideBinarizeLocalTreeNew(tree, i, i2 + 1, i3, false));
        } else if (i3 > i) {
            list = new ArrayList(2);
            list.add(markovInsideBinarizeLocalTreeNew(tree, i, i2, i3 - 1, false));
            list.add(children[i3]);
        } else {
            System.err.println("UHOH, bad parameters passed to markovInsideBinarizeLocalTree");
        }
        Label label = z ? tree.label() : makeSyntheticLabel(tree, i2, i3, i, this.markovOrder);
        if (!this.doSelectiveSplit) {
            this.stateCounter.incrementCount(label.value(), 1.0d);
        } else if (this.stateCounter.getCount(label.value()) < this.selectiveSplitThreshold) {
            label = (!z || this.unaryAtTop) ? makeSyntheticLabel(tree, i2, i3, i, this.markovOrder - 1) : tree.label();
        }
        return this.tf.newTreeNode(label, list);
    }

    private Label makeSyntheticLabel(Tree tree, int i, int i2, int i3, int i4) {
        return this.simpleLabels ? makeSimpleSyntheticLabel(tree) : this.useWrappingLabels ? makeSyntheticLabel2(tree, i, i2, i3, i4) : makeSyntheticLabel1(tree, i, i2, i3, i4);
    }

    private static Label makeSimpleSyntheticLabel(Tree tree) {
        return new CategoryWordTag("@" + tree.label().value(), ((HasWord) tree.label()).word(), ((HasTag) tree.label()).tag());
    }

    private static Label makeSyntheticLabel1(Tree tree, int i, int i2, int i3, int i4) {
        String value = tree.label().value();
        Tree[] children = tree.children();
        String str = i == 0 ? JSWriter.ArrayStart : StringUtils.SPACE;
        String str2 = i2 == children.length - 1 ? JSWriter.ArrayFinish : StringUtils.SPACE;
        for (int i5 = 0; i5 < i4; i5++) {
            if (i >= i3) {
                if (i2 <= i3) {
                    break;
                }
                str2 = StringUtils.SPACE + children[i2].label().value() + str2;
                i2--;
            } else {
                str = str + children[i].label().value() + StringUtils.SPACE;
                i++;
            }
        }
        if (i2 > i3) {
            str2 = "..." + str2;
        }
        if (i < i3) {
            str = str + "...";
        }
        return new CategoryWordTag("@" + value + "| " + str + Tags.LBRACKET + tree.getChild(i3).label().value() + Tags.RBRACKET + str2, ((HasWord) tree.label()).word(), ((HasTag) tree.label()).tag());
    }

    private Label makeSyntheticLabel2(Tree tree, int i, int i2, int i3, int i4) {
        String str;
        String value = tree.label().value();
        Tree[] children = tree.children();
        int i5 = 0;
        if (!this.markFinalStates) {
            str = "";
        } else if (i3 != 0 && i == 0) {
            str = StringUtils.SPACE + children[i].label().value() + Tags.LBRACKET;
            i++;
            i5 = 0 + 1;
        } else if (i3 == 0 && i2 > i3 && i2 == children.length - 1) {
            str = StringUtils.SPACE + children[i2].label().value() + Tags.RBRACKET;
            i2--;
            i5 = 0 + 1;
        } else {
            str = "";
        }
        String str2 = "";
        while (i5 < i4) {
            if (i >= i3) {
                if (i2 <= i3) {
                    break;
                }
                str2 = StringUtils.SPACE + children[i2].label().value() + Tags.symGT + str2;
                i2--;
            } else {
                str2 = StringUtils.SPACE + children[i].label().value() + Tags.symLT + str2;
                i++;
            }
            i5++;
        }
        if (i2 > i3 || i < i3) {
            str2 = " ..." + str2;
        }
        String value2 = tree.getChild(i3).label().value();
        StringBuilder sb = new StringBuilder(4 + value.length() + value2.length() + str2.length() + str.length());
        sb.append("@").append(value).append("| ").append(value2).append("_").append(str2).append(str);
        return new CategoryWordTag(sb.toString(), ((HasWord) tree.label()).word(), ((HasTag) tree.label()).tag());
    }

    private Tree insideBinarizeLocalTree(Tree tree, int i, TaggedWord taggedWord, int i2, int i3) {
        String word = taggedWord.word();
        String tag = taggedWord.tag();
        ArrayList arrayList = new ArrayList(2);
        if (tree.numChildren() <= i2 + i3 + 2) {
            Tree child = tree.getChild(i2);
            arrayList.add(child);
            if (tree.numChildren() == i2 + i3 + 1) {
                return this.tf.newTreeNode(new CategoryWordTag(tree.label().value(), word, tag), arrayList);
            }
            Tree child2 = tree.getChild(i2 + 1);
            arrayList.add(child2);
            String value = tree.label().value();
            if (i2 != 0 || i3 != 0) {
                value = "@ " + child.label().value() + StringUtils.SPACE + child2.label().value();
            }
            return this.tf.newTreeNode(new CategoryWordTag(value, word, tag), arrayList);
        }
        if (i > i2) {
            Tree child3 = tree.getChild(i2);
            Tree insideBinarizeLocalTree = insideBinarizeLocalTree(tree, i, taggedWord, i2 + 1, i3);
            arrayList.add(child3);
            arrayList.add(insideBinarizeLocalTree);
            String str = "@ " + child3.label().value() + StringUtils.SPACE + insideBinarizeLocalTree.label().value().substring(2);
            if (i2 == 0 && i3 == 0) {
                str = tree.label().value();
            }
            return this.tf.newTreeNode(new CategoryWordTag(str, word, tag), arrayList);
        }
        Tree insideBinarizeLocalTree2 = insideBinarizeLocalTree(tree, i, taggedWord, i2, i3 + 1);
        Tree child4 = tree.getChild((tree.numChildren() - i3) - 1);
        arrayList.add(insideBinarizeLocalTree2);
        arrayList.add(child4);
        String str2 = "@ " + insideBinarizeLocalTree2.label().value().substring(2) + StringUtils.SPACE + child4.label().value();
        if (i2 == 0 && i3 == 0) {
            str2 = tree.label().value();
        }
        return this.tf.newTreeNode(new CategoryWordTag(str2, word, tag), arrayList);
    }

    private Tree outsideBinarizeLocalTree(Tree tree, String str, String str2, int i, TaggedWord taggedWord, int i2, String str3, int i3, String str4) {
        ArrayList arrayList = new ArrayList(2);
        CategoryWordTag categoryWordTag = new CategoryWordTag(str, taggedWord.word(), taggedWord.tag());
        if ((tree.numChildren() - i2) - i3 <= 2) {
            arrayList.add(tree.getChild(i2));
            if ((tree.numChildren() - i2) - i3 == 2) {
                arrayList.add(tree.getChild(i2 + 1));
            }
            return this.tf.newTreeNode(categoryWordTag, arrayList);
        }
        if (i <= i2) {
            Tree child = tree.getChild((tree.numChildren() - i3) - 1);
            String str5 = StringUtils.SPACE + child.label().value() + str4;
            arrayList.add(outsideBinarizeLocalTree(tree, this.simpleLabels ? "@" + str2 : "@" + str2 + " :" + str3 + " ..." + str5, str2, i, taggedWord, i2, str3, i3 + 1, str5));
            arrayList.add(child);
            return this.tf.newTreeNode(categoryWordTag, arrayList);
        }
        Tree child2 = tree.getChild(i2);
        String str6 = str3 + StringUtils.SPACE + child2.label().value();
        Tree outsideBinarizeLocalTree = outsideBinarizeLocalTree(tree, this.simpleLabels ? "@" + str2 : "@" + str2 + " :" + str6 + " ..." + str4, str2, i, taggedWord, i2 + 1, str6, i3, str4);
        arrayList.add(child2);
        arrayList.add(outsideBinarizeLocalTree);
        return this.tf.newTreeNode(categoryWordTag, arrayList);
    }

    @Override // edu.stanford.nlp.trees.TreeTransformer
    public Tree transformTree(Tree tree) {
        Tree binarizeLocalTree;
        if (tree == null) {
            return null;
        }
        String value = tree.label().value();
        if (tree.isLeaf()) {
            return this.tf.newLeaf(new Word(value));
        }
        if (tree.isPreTerminal()) {
            Tree transformTree = transformTree(tree.getChild(0));
            String value2 = transformTree.value();
            ArrayList arrayList = new ArrayList(1);
            arrayList.add(transformTree);
            return this.tf.newTreeNode(new CategoryWordTag(value, value2, value), arrayList);
        }
        Tree determineHead = this.hf.determineHead(tree);
        if (determineHead == null && !tree.label().value().startsWith(this.tlp.startSymbol())) {
            System.err.println("### No head found for:");
            tree.pennPrint();
        }
        int i = -1;
        Tree[] children = tree.children();
        ArrayList arrayList2 = new ArrayList(children.length);
        for (int i2 = 0; i2 < children.length; i2++) {
            Tree tree2 = children[i2];
            Tree transformTree2 = transformTree(tree2);
            if (tree2 == determineHead) {
                i = i2;
            }
            arrayList2.add(transformTree2);
        }
        if (tree.label().value().startsWith(this.tlp.startSymbol())) {
            binarizeLocalTree = this.tf.newTreeNode(tree.label(), arrayList2);
        } else {
            String word = ((HasWord) determineHead.label()).word();
            String tag = ((HasTag) determineHead.label()).tag();
            binarizeLocalTree = binarizeLocalTree(this.tf.newTreeNode(new CategoryWordTag(value, word, tag), arrayList2), i, new TaggedWord(word, tag));
        }
        return binarizeLocalTree;
    }

    public TreeBinarizer(HeadFinder headFinder, TreebankLanguagePack treebankLanguagePack, boolean z, boolean z2, int i, boolean z3, boolean z4, double d, boolean z5, boolean z6, boolean z7) {
        this.hf = headFinder;
        this.tlp = treebankLanguagePack;
        this.insideFactor = z;
        this.markovFactor = z2;
        this.markovOrder = i;
        this.useWrappingLabels = z3;
        this.unaryAtTop = z4;
        this.selectiveSplitThreshold = d;
        this.markFinalStates = z5;
        this.simpleLabels = z6;
        this.noRebinarization = z7;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v38, types: [edu.stanford.nlp.trees.TreebankLanguagePack] */
    /* JADX WARN: Type inference failed for: r0v42, types: [edu.stanford.nlp.trees.HeadFinder] */
    /* JADX WARN: Type inference failed for: r0v92, types: [edu.stanford.nlp.trees.TreebankLanguagePack] */
    public static void main(String[] strArr) {
        Treebank diskTreebank;
        TreebankLangParserParams treebankLangParserParams = null;
        TreeReaderFactory treeReaderFactory = new TreeReaderFactory() { // from class: edu.stanford.nlp.parser.lexparser.TreeBinarizer.1
            @Override // edu.stanford.nlp.trees.TreeReaderFactory
            public TreeReader newTreeReader(Reader reader) {
                return new PennTreeReader(reader, new LabeledScoredTreeFactory(new CategoryWordTagFactory()), new BobChrisTreeNormalizer());
            }
        };
        String str = Treebank.DEFAULT_TREE_FILE_SUFFIX;
        ModCollinsHeadFinder modCollinsHeadFinder = new ModCollinsHeadFinder();
        PennTreebankLanguagePack pennTreebankLanguagePack = new PennTreebankLanguagePack();
        boolean z = false;
        int i = 1;
        boolean z2 = false;
        boolean z3 = false;
        int i2 = 0;
        while (i2 < strArr.length && strArr[i2].startsWith("-")) {
            if (strArr[i2].equalsIgnoreCase("-tlp") && i2 + 1 < strArr.length) {
                try {
                    pennTreebankLanguagePack = (TreebankLanguagePack) Class.forName(strArr[i2 + 1]).newInstance();
                    i2++;
                } catch (Exception e) {
                    System.err.println("Couldn't instantiate: " + strArr[i2 + 1]);
                    throw new RuntimeException(e);
                }
            } else if (strArr[i2].equalsIgnoreCase("-tlpp") && i2 + 1 < strArr.length) {
                try {
                    treebankLangParserParams = (TreebankLangParserParams) Class.forName(strArr[i2 + 1]).newInstance();
                    i2++;
                } catch (Exception e2) {
                    System.err.println("Couldn't instantiate: " + strArr[i2 + 1]);
                    throw new RuntimeException(e2);
                }
            } else if (strArr[i2].equalsIgnoreCase("-insideFactor")) {
                z = true;
            } else if (strArr[i2].equalsIgnoreCase("-markovOrder") && i2 + 1 < strArr.length) {
                i2++;
                i = Integer.parseInt(strArr[i2]);
            } else if (strArr[i2].equalsIgnoreCase("-simpleLabels")) {
                z2 = true;
            } else if (strArr[i2].equalsIgnoreCase("-noRebinarization")) {
                z3 = true;
            } else {
                System.err.println("Unknown option:" + strArr[i2]);
            }
            i2++;
        }
        if (i2 >= strArr.length) {
            System.err.println("usage: java TreeBinarizer [-tlpp class|-markovOrder int|...] treebankPath");
            System.exit(0);
        }
        if (treebankLangParserParams != null) {
            diskTreebank = treebankLangParserParams.memoryTreebank();
            pennTreebankLanguagePack = treebankLangParserParams.treebankLanguagePack();
            str = pennTreebankLanguagePack.treebankFileExtension();
            modCollinsHeadFinder = treebankLangParserParams.headFinder();
        } else {
            diskTreebank = new DiskTreebank(treeReaderFactory);
        }
        diskTreebank.loadPath(strArr[i2], str, true);
        TreeBinarizer treeBinarizer = new TreeBinarizer(modCollinsHeadFinder, pennTreebankLanguagePack, z, false, i, false, false, 20.0d, false, z2, z3);
        Iterator<Tree> it = diskTreebank.iterator();
        while (it.hasNext()) {
            Tree next = it.next();
            Tree transformTree = treeBinarizer.transformTree(next);
            System.out.println("Original tree:");
            next.pennPrint();
            System.out.println("Binarized tree:");
            transformTree.pennPrint();
            System.out.println();
        }
    }
}
