package cc.mallet.grmm.learning;

import bsh.EvalError;
import cc.mallet.grmm.inference.Inferencer;
import cc.mallet.grmm.learning.ACRF;
import cc.mallet.grmm.learning.DefaultAcrfTrainer;
import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.SerialPipes;
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence;
import cc.mallet.pipe.iterator.LineGroupIterator;
import cc.mallet.types.InstanceList;
import cc.mallet.util.BshInterpreter;
import cc.mallet.util.CommandOption;
import cc.mallet.util.FileUtils;
import cc.mallet.util.Timing;
import edu.stanford.nlp.ling.tokensregex.types.Expressions;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import org.apache.xalan.templates.Constants;

/* loaded from: input_file:WEB-INF/lib/mallet-2.0.7.jar:cc/mallet/grmm/learning/GenericAcrfTui.class */
public class GenericAcrfTui {
    private static CommandOption.File modelFile = new CommandOption.File(GenericAcrfTui.class, "model-file", "FILENAME", true, null, "Text file describing model structure.", null);
    private static CommandOption.File trainFile = new CommandOption.File(GenericAcrfTui.class, "training", "FILENAME", true, null, "File containing training data.", null);
    private static CommandOption.File testFile = new CommandOption.File(GenericAcrfTui.class, "testing", "FILENAME", true, null, "File containing testing data.", null);
    private static CommandOption.Integer numLabelsOption = new CommandOption.Integer(GenericAcrfTui.class, "num-labels", "INT", true, -1, "If supplied, number of labels on each line of input file.  Otherwise, the token ---- must separate labels from features.", null);
    private static CommandOption.String inferencerOption = new CommandOption.String(GenericAcrfTui.class, "inferencer", Expressions.TYPE_STRING, true, "TRP", "Specification of inferencer.", null);
    private static CommandOption.String maxInferencerOption = new CommandOption.String(GenericAcrfTui.class, "max-inferencer", Expressions.TYPE_STRING, true, "TRP.createForMaxProduct()", "Specification of inferencer.", null);
    private static CommandOption.String evalOption = new CommandOption.String(GenericAcrfTui.class, Constants.ELEMNAME_EVAL_STRING, Expressions.TYPE_STRING, true, "LOG", "Evaluator to use.  Java code grokking performed.", null);
    private static CommandOption.Boolean usePiecewiseTraining = new CommandOption.Boolean(GenericAcrfTui.class, "piecewise", "true|false", true, false, "Whether to use piecewise training.", null);
    private static CommandOption.Boolean usePwplTraining = new CommandOption.Boolean(GenericAcrfTui.class, "pwpl", "true|false", true, false, "Whether to use pwpl training.", null);
    private static CommandOption.Boolean usePlTraining = new CommandOption.Boolean(GenericAcrfTui.class, "pl", "true|false", true, false, "Whether to use Besag pseudolikelihood.", null);
    static CommandOption.Boolean cacheUnrolledGraph = new CommandOption.Boolean(GenericAcrfTui.class, "cache-graphs", "true|false", true, false, "Whether to use memory-intensive caching.", null);
    static CommandOption.Boolean useTokenText = new CommandOption.Boolean(GenericAcrfTui.class, "use-token-text", "true|false", true, false, "Set this to true if first feature in every list is should be considered the text of the current token.  This is used for NLP-specific debugging and error analysis.", null);
    static CommandOption.Integer randomSeedOption = new CommandOption.Integer(GenericAcrfTui.class, "random-seed", "INTEGER", true, 0, "The random seed for randomly selecting a proportion of the instance list for training", null);
    private static BshInterpreter interpreter = setupInterpreter();

    private static ACRFTrainer createTrainer() {
        return usePiecewiseTraining.value ? new PiecewiseACRFTrainer() : usePwplTraining.value ? new PwplACRFTrainer() : usePlTraining.value ? new PseudolikelihoodACRFTrainer() : new DefaultAcrfTrainer();
    }

    public static void main(String[] strArr) throws IOException, EvalError {
        doProcessOptions(GenericAcrfTui.class, strArr);
        Timing timing = new Timing();
        GenericAcrfData2TokenSequence genericAcrfData2TokenSequence = !numLabelsOption.wasInvoked() ? new GenericAcrfData2TokenSequence() : new GenericAcrfData2TokenSequence(numLabelsOption.value);
        genericAcrfData2TokenSequence.setFeaturesIncludeToken(useTokenText.value);
        genericAcrfData2TokenSequence.setIncludeTokenText(useTokenText.value);
        SerialPipes serialPipes = new SerialPipes(new Pipe[]{genericAcrfData2TokenSequence, new TokenSequence2FeatureVectorSequence(true, true)});
        LineGroupIterator lineGroupIterator = new LineGroupIterator(new FileReader(trainFile.value), Pattern.compile("^\\s*$"), true);
        LineGroupIterator lineGroupIterator2 = testFile.wasInvoked() ? new LineGroupIterator(new FileReader(testFile.value), Pattern.compile("^\\s*$"), true) : null;
        InstanceList instanceList = new InstanceList(serialPipes);
        instanceList.addThruPipe(lineGroupIterator);
        InstanceList instanceList2 = new InstanceList(serialPipes);
        instanceList2.addThruPipe(lineGroupIterator2);
        ACRF.Template[] parseModelFile = parseModelFile(modelFile.value);
        ACRFEvaluator createEvaluator = createEvaluator(evalOption.value);
        Inferencer createInferencer = createInferencer(inferencerOption.value);
        Inferencer createInferencer2 = createInferencer(maxInferencerOption.value);
        ACRF acrf = new ACRF(serialPipes, parseModelFile);
        acrf.setInferencer(createInferencer);
        acrf.setViterbiInferencer(createInferencer2);
        ACRFTrainer createTrainer = createTrainer();
        System.err.println("ACRF Trainer = " + createTrainer);
        createTrainer.train(acrf, instanceList, null, instanceList2, createEvaluator, 9999);
        timing.tick("Training");
        FileUtils.writeGzippedObject(new File("acrf.ser.gz"), acrf);
        timing.tick("Serializing");
        System.err.println("Total time (ms) = " + timing.elapsedTime());
    }

    private static BshInterpreter setupInterpreter() {
        BshInterpreter interpreter2 = CommandOption.getInterpreter();
        try {
            interpreter2.eval("import cc.mallet.base.extract.*");
            interpreter2.eval("import cc.mallet.grmm.inference.*");
            interpreter2.eval("import cc.mallet.grmm.learning.*");
            interpreter2.eval("import cc.mallet.grmm.learning.templates.*");
            return interpreter2;
        } catch (EvalError e) {
            throw new RuntimeException(e);
        }
    }

    public static ACRFEvaluator createEvaluator(String str) throws EvalError {
        return str.indexOf(40) >= 0 ? (ACRFEvaluator) interpreter.eval(str) : createEvaluator(new LinkedList(Arrays.asList(str.split("\\s+"))));
    }

    private static ACRFEvaluator createEvaluator(LinkedList linkedList) {
        String str = (String) linkedList.removeFirst();
        if (!str.equalsIgnoreCase("SEGMENT")) {
            if (str.equalsIgnoreCase("LOG")) {
                return new DefaultAcrfTrainer.LogEvaluator();
            }
            if (!str.equalsIgnoreCase("SERIAL")) {
                throw new RuntimeException("Error in --eval " + evalOption.value + ": illegal evaluator " + str);
            }
            ArrayList arrayList = new ArrayList();
            while (!linkedList.isEmpty()) {
                arrayList.add(createEvaluator(linkedList));
            }
            return new AcrfSerialEvaluator(arrayList);
        }
        int parseInt = Integer.parseInt((String) linkedList.removeFirst());
        if (linkedList.size() % 2 != 0) {
            throw new RuntimeException("Error in --eval " + evalOption.value + ": Every start tag must have a continue.");
        }
        int size = linkedList.size() / 2;
        String[] strArr = new String[size];
        String[] strArr2 = new String[size];
        for (int i = 0; i < size; i++) {
            strArr[i] = (String) linkedList.removeFirst();
            strArr2[i] = (String) linkedList.removeFirst();
        }
        return new MultiSegmentationEvaluatorACRF(strArr, strArr2, parseInt);
    }

    private static Inferencer createInferencer(String str) throws EvalError {
        Object eval = interpreter.eval(str.indexOf(40) >= 0 ? str : "new " + str + "()");
        if (eval instanceof Inferencer) {
            return (Inferencer) eval;
        }
        throw new RuntimeException("Don't know what to do with inferencer " + eval);
    }

    public static void doProcessOptions(Class cls, String[] strArr) {
        CommandOption.List list = new CommandOption.List("", new CommandOption[0]);
        list.add(cls);
        list.process(strArr);
        list.logOptions(Logger.getLogger(""));
    }

    private static ACRF.Template[] parseModelFile(File file) throws IOException, EvalError {
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        ArrayList arrayList = new ArrayList();
        String readLine = bufferedReader.readLine();
        while (true) {
            String str = readLine;
            if (str == null) {
                return (ACRF.Template[]) arrayList.toArray(new ACRF.Template[0]);
            }
            Object eval = interpreter.eval(str);
            if (!(eval instanceof ACRF.Template)) {
                throw new RuntimeException("Error in " + file + " line " + bufferedReader.toString() + ":\n  Object " + eval + " not a template");
            }
            arrayList.add(eval);
            readLine = bufferedReader.readLine();
        }
    }
}
