package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.io.NumberRangeFileFilter;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.parser.ViterbiParser;
import edu.stanford.nlp.parser.lexparser.BiLexPCFGParser;
import edu.stanford.nlp.parser.metrics.Evalb;
import edu.stanford.nlp.parser.metrics.TaggingEval;
import edu.stanford.nlp.parser.metrics.UnlabeledAttachmentEval;
import edu.stanford.nlp.trees.LeftHeadFinder;
import edu.stanford.nlp.trees.MemoryTreebank;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeLengthComparator;
import edu.stanford.nlp.trees.TreeTransformer;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.util.Function;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.HashIndex;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.Timing;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

/* loaded from: input_file:WEB-INF/lib/stanford-corenlp-3.4.1.jar:edu/stanford/nlp/parser/lexparser/FactoredParser.class */
public class FactoredParser {
    /* JADX WARN: Multi-variable type inference failed */
    public static void main(String[] strArr) {
        Pair pair;
        Options options = new Options(new EnglishTreebankParserParams());
        System.out.println(StringUtils.toInvocationString("FactoredParser", strArr));
        String str = "/u/nlp/stuff/corpora/Treebank3/parsed/mrg/wsj";
        int i = 200;
        int i2 = 2199;
        int i3 = 2200;
        int i4 = 2219;
        String str2 = null;
        int i5 = 0;
        while (i5 < strArr.length && strArr[i5].startsWith("-")) {
            if (strArr[i5].equalsIgnoreCase("-path") && i5 + 1 < strArr.length) {
                str = strArr[i5 + 1];
                i5 += 2;
            } else if (strArr[i5].equalsIgnoreCase("-train") && i5 + 2 < strArr.length) {
                i = Integer.parseInt(strArr[i5 + 1]);
                i2 = Integer.parseInt(strArr[i5 + 2]);
                i5 += 3;
            } else if (strArr[i5].equalsIgnoreCase("-test") && i5 + 2 < strArr.length) {
                i3 = Integer.parseInt(strArr[i5 + 1]);
                i4 = Integer.parseInt(strArr[i5 + 2]);
                i5 += 3;
            } else if (strArr[i5].equalsIgnoreCase("-serialize") && i5 + 1 < strArr.length) {
                str2 = strArr[i5 + 1];
                i5 += 2;
            } else if (strArr[i5].equalsIgnoreCase("-tLPP") && i5 + 1 < strArr.length) {
                try {
                    options.tlpParams = (TreebankLangParserParams) Class.forName(strArr[i5 + 1]).newInstance();
                    i5 += 2;
                } catch (ClassNotFoundException e) {
                    System.err.println("Class not found: " + strArr[i5 + 1]);
                    throw new RuntimeException(e);
                } catch (IllegalAccessException e2) {
                    System.err.println("illegal access" + e2);
                    throw new RuntimeException(e2);
                } catch (InstantiationException e3) {
                    System.err.println("Couldn't instantiate: " + strArr[i5 + 1] + ": " + e3.toString());
                    throw new RuntimeException(e3);
                }
            } else if (strArr[i5].equals("-encoding")) {
                options.tlpParams.setInputEncoding(strArr[i5 + 1]);
                options.tlpParams.setOutputEncoding(strArr[i5 + 1]);
                i5 += 2;
            } else {
                i5 = options.setOptionOrWarn(strArr, i5);
            }
        }
        TreebankLanguagePack treebankLanguagePack = options.tlpParams.treebankLanguagePack();
        options.trainOptions.sisterSplitters = Generics.newHashSet(Arrays.asList(options.tlpParams.sisterSplitters()));
        PrintWriter pw = options.tlpParams.pw();
        options.testOptions.display();
        options.trainOptions.display();
        options.display();
        options.tlpParams.display();
        MemoryTreebank memoryTreebank = options.tlpParams.memoryTreebank();
        MemoryTreebank testMemoryTreebank = options.tlpParams.testMemoryTreebank();
        Timing.startTime();
        System.err.print("Reading trees...");
        testMemoryTreebank.loadPath(str, new NumberRangeFileFilter(i3, i4, true));
        if (options.testOptions.increasingLength) {
            Collections.sort(testMemoryTreebank, new TreeLengthComparator());
        }
        memoryTreebank.loadPath(str, new NumberRangeFileFilter(i, i2, true));
        Timing.tick("done.");
        System.err.print("Binarizing trees...");
        TreeAnnotatorAndBinarizer treeAnnotatorAndBinarizer = options.trainOptions.leftToRight ? new TreeAnnotatorAndBinarizer(options.tlpParams.headFinder(), new LeftHeadFinder(), options.tlpParams, options.forceCNF, !options.trainOptions.outsideFactor(), true, options) : new TreeAnnotatorAndBinarizer(options.tlpParams, options.forceCNF, !options.trainOptions.outsideFactor(), true, options);
        CollinsPuncTransformer collinsPuncTransformer = options.trainOptions.collinsPunc ? new CollinsPuncTransformer(treebankLanguagePack) : null;
        Debinarizer debinarizer = new Debinarizer(options.forceCNF);
        ArrayList arrayList = new ArrayList();
        if (options.trainOptions.selectiveSplit) {
            options.trainOptions.splitters = ParentAnnotationStats.getSplitCategories(memoryTreebank, options.trainOptions.tagSelectiveSplit, 0, options.trainOptions.selectiveSplitCutOff, options.trainOptions.tagSelectiveSplitCutOff, options.tlpParams.treebankLanguagePack());
            if (options.trainOptions.deleteSplitters != null) {
                ArrayList arrayList2 = new ArrayList();
                for (String str3 : options.trainOptions.deleteSplitters) {
                    String basicCategory = treebankLanguagePack.basicCategory(str3);
                    boolean equals = str3.equals(basicCategory);
                    Iterator<String> it = options.trainOptions.splitters.iterator();
                    while (it.hasNext()) {
                        String next = it.next();
                        if ((equals && treebankLanguagePack.basicCategory(next).equals(basicCategory)) || next.equals(str3)) {
                            it.remove();
                            arrayList2.add(next);
                        }
                    }
                }
                System.err.println("Removed from vertical splitters: " + arrayList2);
            }
        }
        if (options.trainOptions.selectivePostSplit) {
            options.trainOptions.postSplitters = ParentAnnotationStats.getSplitCategories(memoryTreebank.transform(new TreeAnnotator(options.tlpParams.headFinder(), options.tlpParams, options)), true, 0, options.trainOptions.selectivePostSplitCutOff, options.trainOptions.tagSelectivePostSplitCutOff, options.tlpParams.treebankLanguagePack());
        }
        if (options.trainOptions.hSelSplit) {
            treeAnnotatorAndBinarizer.setDoSelectiveSplit(false);
            Iterator<Tree> it2 = memoryTreebank.iterator();
            while (it2.hasNext()) {
                Tree next2 = it2.next();
                if (options.trainOptions.collinsPunc) {
                    next2 = collinsPuncTransformer.transformTree(next2);
                }
                treeAnnotatorAndBinarizer.transformTree(next2);
            }
            treeAnnotatorAndBinarizer.setDoSelectiveSplit(true);
        }
        Iterator<Tree> it3 = memoryTreebank.iterator();
        while (it3.hasNext()) {
            Tree next3 = it3.next();
            if (options.trainOptions.collinsPunc) {
                next3 = collinsPuncTransformer.transformTree(next3);
            }
            arrayList.add(treeAnnotatorAndBinarizer.transformTree(next3));
        }
        if (options.testOptions.verbose) {
            treeAnnotatorAndBinarizer.dumpStats();
        }
        ArrayList arrayList3 = new ArrayList();
        Iterator<Tree> it4 = testMemoryTreebank.iterator();
        while (it4.hasNext()) {
            Tree next4 = it4.next();
            if (options.trainOptions.collinsPunc) {
                next4 = collinsPuncTransformer.transformTree(next4);
            }
            arrayList3.add(treeAnnotatorAndBinarizer.transformTree(next4));
        }
        Timing.tick("done.");
        BinaryGrammar binaryGrammar = null;
        UnaryGrammar unaryGrammar = null;
        DependencyGrammar dependencyGrammar = null;
        HashIndex hashIndex = new HashIndex();
        BinaryGrammarExtractor binaryGrammarExtractor = new BinaryGrammarExtractor(options, hashIndex);
        if (options.doPCFG) {
            System.err.print("Extracting PCFG...");
            if (options.trainOptions.cheatPCFG) {
                ArrayList arrayList4 = new ArrayList(arrayList);
                arrayList4.addAll(arrayList3);
                pair = (Pair) binaryGrammarExtractor.extract(arrayList4);
            } else {
                pair = (Pair) binaryGrammarExtractor.extract(arrayList);
            }
            binaryGrammar = (BinaryGrammar) pair.second;
            binaryGrammar.splitRules();
            unaryGrammar = (UnaryGrammar) pair.first;
            unaryGrammar.purgeRules();
            Timing.tick("done.");
        }
        System.err.print("Extracting Lexicon...");
        HashIndex hashIndex2 = new HashIndex();
        HashIndex hashIndex3 = new HashIndex();
        Lexicon lex = options.tlpParams.lex(options, hashIndex2, hashIndex3);
        lex.initializeTraining(arrayList.size());
        lex.train(arrayList);
        lex.finishTraining();
        Timing.tick("done.");
        if (options.doDep) {
            System.err.print("Extracting Dependencies...");
            arrayList.clear();
            dependencyGrammar = new MLEDependencyGrammarExtractor(options, hashIndex2, hashIndex3).extract(arrayList);
            Timing.tick("done.");
            System.out.print("Tuning Dependency Model...");
            dependencyGrammar.tune(arrayList3);
            Timing.tick("done.");
        }
        BinaryGrammar binaryGrammar2 = binaryGrammar;
        UnaryGrammar unaryGrammar2 = unaryGrammar;
        NullGrammarProjection nullGrammarProjection = new NullGrammarProjection(binaryGrammar, unaryGrammar);
        if (str2 != null) {
            System.err.print("Serializing parser...");
            new LexicalizedParser(lex, binaryGrammar, unaryGrammar, dependencyGrammar, hashIndex, hashIndex2, hashIndex3, options).saveParserToSerialized(str2);
            Timing.tick("done.");
        }
        ExhaustivePCFGParser exhaustivePCFGParser = options.doPCFG ? new ExhaustivePCFGParser(binaryGrammar2, unaryGrammar2, lex, options, hashIndex, hashIndex2, hashIndex3) : null;
        ExhaustiveDependencyParser exhaustiveDependencyParser = (!options.doDep || options.testOptions.useFastFactored) ? null : new ExhaustiveDependencyParser(dependencyGrammar, lex, options, hashIndex2, hashIndex3);
        TwinScorer twinScorer = options.doPCFG ? new TwinScorer(new ProjectionScorer(exhaustivePCFGParser, nullGrammarProjection, options), exhaustiveDependencyParser) : null;
        ViterbiParser viterbiParser = null;
        if (options.doPCFG && options.doDep) {
            viterbiParser = options.testOptions.useN5 ? new BiLexPCFGParser.N5BiLexPCFGParser(twinScorer, exhaustivePCFGParser, exhaustiveDependencyParser, binaryGrammar, unaryGrammar, dependencyGrammar, lex, options, nullGrammarProjection, hashIndex, hashIndex2, hashIndex3) : new BiLexPCFGParser(twinScorer, exhaustivePCFGParser, exhaustiveDependencyParser, binaryGrammar, unaryGrammar, dependencyGrammar, lex, options, nullGrammarProjection, hashIndex, hashIndex2, hashIndex3);
        }
        Evalb evalb = new Evalb("pcfg  PE", true);
        Evalb evalb2 = new Evalb("combo PE", true);
        Evalb.CBEval cBEval = new Evalb.CBEval("pcfg  CB", true);
        TaggingEval taggingEval = new TaggingEval("pcfg  TE");
        TaggingEval taggingEval2 = new TaggingEval("combo TE");
        TaggingEval taggingEval3 = new TaggingEval("pcfg nopunct TE");
        TaggingEval taggingEval4 = new TaggingEval("combo nopunct TE");
        TaggingEval taggingEval5 = new TaggingEval("depnd TE");
        UnlabeledAttachmentEval unlabeledAttachmentEval = new UnlabeledAttachmentEval("depnd DE", true, null, treebankLanguagePack.punctuationWordRejectFilter());
        UnlabeledAttachmentEval unlabeledAttachmentEval2 = new UnlabeledAttachmentEval("combo DE", true, null, treebankLanguagePack.punctuationWordRejectFilter());
        if (options.testOptions.evalb) {
            EvalbFormatWriter.initEVALBfiles(options.tlpParams);
        }
        Function function = null;
        if (options.testOptions.preTag) {
            try {
                function = (Function) Class.forName("edu.stanford.nlp.tagger.maxent.MaxentTagger").getConstructor(String.class).newInstance(options.testOptions.taggerSerializedFile);
            } catch (Exception e4) {
                System.err.println(e4);
                System.err.println("Warning: No pretagging of sentences will be done.");
            }
        }
        int size = testMemoryTreebank.size();
        for (int i6 = 0; i6 < size; i6++) {
            Tree tree = testMemoryTreebank.get(i6);
            int size2 = tree.yield().size();
            if (size2 <= options.testOptions.maxLength) {
                Tree tree2 = (Tree) arrayList3.get(i6);
                System.out.println("-------------------------------------");
                System.out.println("Number: " + (i6 + 1));
                System.out.println("Length: " + size2);
                long currentTimeMillis = System.currentTimeMillis();
                Timing.tick("Starting parse.");
                if (options.doPCFG) {
                    if (!options.testOptions.forceTags) {
                        exhaustivePCFGParser.parse(tree2.yieldHasWord());
                    } else if (function != null) {
                        exhaustivePCFGParser.parse(addLast((ArrayList) function.apply(cutLast(wordify(tree2.yield())))));
                    } else {
                        exhaustivePCFGParser.parse(cleanTags(tree2.taggedYield(), treebankLanguagePack));
                    }
                }
                if (options.doDep) {
                    exhaustiveDependencyParser.parse(tree2.yieldHasWord());
                }
                boolean z = false;
                if (options.doPCFG && options.doDep) {
                    z = viterbiParser.parse(tree2.yieldHasWord());
                }
                System.err.println("Time: " + (((int) ((System.currentTimeMillis() - currentTimeMillis) / 100)) / 10.0d) + " sec.");
                Tree tree3 = null;
                Tree tree4 = null;
                if (options.doPCFG) {
                    tree3 = exhaustivePCFGParser.getBestParse();
                    tree4 = debinarizer.transformTree(tree3);
                }
                Tree tree5 = null;
                Tree tree6 = null;
                if (options.doDep) {
                    tree5 = exhaustiveDependencyParser.getBestParse();
                    tree6 = debinarizer.transformTree(tree5);
                    tree5.pennPrint(pw);
                }
                Tree tree7 = null;
                if (options.doPCFG && options.doDep) {
                    try {
                        tree7 = viterbiParser.getBestParse();
                        if (tree7 == null) {
                            tree7 = tree3;
                        }
                    } catch (NullPointerException e5) {
                        System.err.println("Blocked, using PCFG parse!");
                        tree7 = tree3;
                    }
                }
                if (options.doPCFG && !z) {
                    tree7 = tree3;
                }
                if (options.doDep) {
                    unlabeledAttachmentEval.evaluate(tree5, tree2, pw);
                    taggingEval5.evaluate(tree6, tree, pw);
                }
                TreeTransformer collinizer = options.tlpParams.collinizer();
                TreeTransformer collinizerEvalb = options.tlpParams.collinizerEvalb();
                if (options.doPCFG) {
                    evalb.evaluate(collinizer.transformTree(tree4), collinizer.transformTree(tree), pw);
                    cBEval.evaluate(collinizer.transformTree(tree4), collinizer.transformTree(tree), pw);
                    Tree tree8 = null;
                    if (options.doDep) {
                        unlabeledAttachmentEval2.evaluate(z ? tree7 : tree5, tree2, pw);
                        tree8 = tree7;
                        tree7 = debinarizer.transformTree(tree7);
                        if (options.nodePrune) {
                            tree7 = new NodePruner(exhaustivePCFGParser, debinarizer).prune(tree7);
                        }
                        evalb2.evaluate(collinizer.transformTree(tree7), collinizer.transformTree(tree), pw);
                    }
                    taggingEval.evaluate(collinizerEvalb.transformTree(tree4), collinizerEvalb.transformTree(tree), pw);
                    taggingEval3.evaluate(collinizer.transformTree(tree4), collinizer.transformTree(tree), pw);
                    if (options.doDep) {
                        taggingEval2.evaluate(collinizerEvalb.transformTree(tree7), collinizerEvalb.transformTree(tree), pw);
                        taggingEval4.evaluate(collinizer.transformTree(tree7), collinizer.transformTree(tree), pw);
                    }
                    System.out.println("PCFG only: " + exhaustivePCFGParser.scoreBinarizedTree(tree3, 0));
                    tree4.pennPrint(pw);
                    if (options.doDep) {
                        System.out.println("Combo: " + exhaustivePCFGParser.scoreBinarizedTree(tree8, 0));
                        tree7.pennPrint(pw);
                    }
                    System.out.println("Correct:" + exhaustivePCFGParser.scoreBinarizedTree(tree2, 0));
                    tree.pennPrint(pw);
                }
                if (options.testOptions.evalb) {
                    if (options.doPCFG && options.doDep) {
                        EvalbFormatWriter.writeEVALBline(collinizerEvalb.transformTree(tree), collinizerEvalb.transformTree(tree7));
                    } else if (options.doPCFG) {
                        EvalbFormatWriter.writeEVALBline(collinizerEvalb.transformTree(tree), collinizerEvalb.transformTree(tree4));
                    } else if (options.doDep) {
                        EvalbFormatWriter.writeEVALBline(collinizerEvalb.transformTree(tree), collinizerEvalb.transformTree(tree6));
                    }
                }
            }
        }
        if (options.testOptions.evalb) {
            EvalbFormatWriter.closeEVALBfiles();
        }
        if (options.doPCFG) {
            evalb.display(false, pw);
            System.out.println("Grammar size: " + hashIndex.size());
            cBEval.display(false, pw);
            if (options.doDep) {
                evalb2.display(false, pw);
            }
            taggingEval.display(false, pw);
            taggingEval3.display(false, pw);
            if (options.doDep) {
                taggingEval2.display(false, pw);
                taggingEval4.display(false, pw);
            }
        }
        if (options.doDep) {
            taggingEval5.display(false, pw);
            unlabeledAttachmentEval.display(false, pw);
        }
        if (options.doPCFG && options.doDep) {
            unlabeledAttachmentEval2.display(false, pw);
        }
    }

    private static List<TaggedWord> cleanTags(List<TaggedWord> list, TreebankLanguagePack treebankLanguagePack) {
        int size = list.size();
        ArrayList arrayList = new ArrayList(size);
        for (int i = 0; i < size; i++) {
            TaggedWord taggedWord = list.get(i);
            arrayList.add(new TaggedWord(taggedWord.word(), treebankLanguagePack.basicCategory(taggedWord.tag())));
        }
        return arrayList;
    }

    private static ArrayList<Word> wordify(List list) {
        ArrayList<Word> arrayList = new ArrayList<>();
        Iterator it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(new Word(it.next().toString()));
        }
        return arrayList;
    }

    private static ArrayList<Word> cutLast(ArrayList<Word> arrayList) {
        return new ArrayList<>(arrayList.subList(0, arrayList.size() - 1));
    }

    private static ArrayList<Word> addLast(ArrayList<? extends Word> arrayList) {
        ArrayList<Word> arrayList2 = new ArrayList<>(arrayList);
        arrayList2.add(new Word(".$."));
        return arrayList2;
    }

    private FactoredParser() {
    }
}
