package cc.mallet.examples;

import cc.mallet.fst.CRF;
import cc.mallet.fst.CRFTrainerByLabelLikelihood;
import cc.mallet.fst.PerClassAccuracyEvaluator;
import cc.mallet.fst.TokenAccuracyEvaluator;
import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.SerialPipes;
import cc.mallet.pipe.SimpleTaggerSentence2TokenSequence;
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence;
import cc.mallet.pipe.iterator.LineGroupIterator;
import cc.mallet.pipe.tsf.OffsetConjunctions;
import cc.mallet.pipe.tsf.RegexMatches;
import cc.mallet.pipe.tsf.TokenFirstPosition;
import cc.mallet.pipe.tsf.TokenTextCharSuffix;
import cc.mallet.types.InstanceList;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;

/* loaded from: input_file:WEB-INF/lib/mallet-2.0.7.jar:cc/mallet/examples/TrainCRF.class */
public class TrainCRF {
    /* JADX WARN: Type inference failed for: r0v3, types: [int[], int[][]] */
    public TrainCRF(String str, String str2) throws IOException {
        ArrayList arrayList = new ArrayList();
        arrayList.add(new SimpleTaggerSentence2TokenSequence());
        arrayList.add(new OffsetConjunctions(new int[]{new int[]{-1}, new int[]{1}}));
        arrayList.add(new TokenTextCharSuffix("C1=", 1));
        arrayList.add(new TokenTextCharSuffix("C2=", 2));
        arrayList.add(new TokenTextCharSuffix("C3=", 3));
        arrayList.add(new RegexMatches("CAPITALIZED", Pattern.compile("^\\p{Lu}.*")));
        arrayList.add(new RegexMatches("STARTSNUMBER", Pattern.compile("^[0-9].*")));
        arrayList.add(new RegexMatches("HYPHENATED", Pattern.compile(".*\\-.*")));
        arrayList.add(new RegexMatches("DOLLARSIGN", Pattern.compile(".*\\$.*")));
        arrayList.add(new TokenFirstPosition("FIRSTTOKEN"));
        arrayList.add(new TokenSequence2FeatureVectorSequence());
        SerialPipes serialPipes = new SerialPipes(arrayList);
        InstanceList instanceList = new InstanceList(serialPipes);
        InstanceList instanceList2 = new InstanceList(serialPipes);
        instanceList.addThruPipe(new LineGroupIterator(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(str)))), Pattern.compile("^\\s*$"), true));
        instanceList2.addThruPipe(new LineGroupIterator(new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(str2)))), Pattern.compile("^\\s*$"), true));
        CRF crf = new CRF(serialPipes, (Pipe) null);
        crf.addStatesForThreeQuarterLabelsConnectedAsIn(instanceList);
        crf.addStartState();
        CRFTrainerByLabelLikelihood cRFTrainerByLabelLikelihood = new CRFTrainerByLabelLikelihood(crf);
        cRFTrainerByLabelLikelihood.setGaussianPriorVariance(10.0d);
        cRFTrainerByLabelLikelihood.addEvaluator(new PerClassAccuracyEvaluator(instanceList2, "testing"));
        cRFTrainerByLabelLikelihood.addEvaluator(new TokenAccuracyEvaluator(instanceList2, "testing"));
        cRFTrainerByLabelLikelihood.train(instanceList);
    }

    public static void main(String[] strArr) throws Exception {
        new TrainCRF(strArr[0], strArr[1]);
    }
}
