package cc.mallet.grmm.learning;

import cc.mallet.extract.StringSpan;
import cc.mallet.extract.StringTokenization;
import cc.mallet.grmm.util.LabelsAssignment;
import cc.mallet.pipe.Pipe;
import cc.mallet.types.Alphabet;
import cc.mallet.types.Instance;
import cc.mallet.types.Label;
import cc.mallet.types.LabelAlphabet;
import cc.mallet.types.Labels;
import cc.mallet.types.LabelsSequence;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import org.apache.commons.lang3.StringUtils;

/* loaded from: input_file:WEB-INF/lib/mallet-2.0.7.jar:cc/mallet/grmm/learning/GenericAcrfData2TokenSequence.class */
public class GenericAcrfData2TokenSequence extends Pipe {
    private ArrayList labelDicts;
    private int numLabels;
    private boolean includeTokenText;
    private String textFeaturePrefix;
    private boolean featuresIncludeToken;
    private boolean labelsAtEnd;
    private static final long serialVersionUID = 1;
    private static final int CURRENT_SERIAL_VERSION = 2;

    public GenericAcrfData2TokenSequence() {
        super(new Alphabet(), new LabelAlphabet());
        this.labelDicts = new ArrayList();
        this.numLabels = -1;
        this.includeTokenText = true;
        this.textFeaturePrefix = "WORD=";
        this.featuresIncludeToken = true;
        this.labelsAtEnd = false;
        this.labelDicts.add(getTargetAlphabet());
    }

    public GenericAcrfData2TokenSequence(int i) {
        super(new Alphabet(), new LabelAlphabet());
        this.labelDicts = new ArrayList();
        this.numLabels = -1;
        this.includeTokenText = true;
        this.textFeaturePrefix = "WORD=";
        this.featuresIncludeToken = true;
        this.labelsAtEnd = false;
        this.numLabels = i;
        this.labelDicts.add(getTargetAlphabet());
    }

    public void setIncludeTokenText(boolean z) {
        this.includeTokenText = z;
    }

    public void setFeaturesIncludeToken(boolean z) {
        this.featuresIncludeToken = z;
    }

    public boolean getFeaturesIncludeToken() {
        return this.featuresIncludeToken;
    }

    public void setTextFeaturePrefix(String str) {
        this.textFeaturePrefix = str;
    }

    public LabelAlphabet getLabelAlphabet(int i) {
        return (LabelAlphabet) this.labelDicts.get(i);
    }

    public int numLevels() {
        return this.labelDicts.size();
    }

    @Override // cc.mallet.pipe.Pipe
    public Instance pipe(Instance instance) {
        if (!(instance.getData() instanceof CharSequence)) {
            throw new ClassCastException("Needed a String; got " + instance.getData());
        }
        String[] split = String.valueOf(instance.getData()).split("\n");
        StringSpan[] stringSpanArr = new StringSpan[split.length];
        Labels[] labelsArr = new Labels[split.length];
        StringBuffer stringBuffer = new StringBuffer();
        getDataAlphabet();
        for (int i = 0; i < split.length; i++) {
            String[] split2 = split[i].split("\\s+");
            int i2 = 0;
            ArrayList arrayList = new ArrayList();
            if (!this.labelsAtEnd) {
                while (!isLabelSeparator(split2, i2)) {
                    arrayList.add(labelForTok(split2[i2], i2));
                    i2++;
                }
                if (i2 < split2.length && split2[i2].equals("----")) {
                    i2++;
                }
                labelsArr[i] = new Labels((Label[]) arrayList.toArray(new Label[arrayList.size()]));
            }
            int length = this.labelsAtEnd ? split2.length - this.numLabels : split2.length;
            String str = "*???*";
            if (this.featuresIncludeToken && i2 < length) {
                int i3 = i2;
                i2++;
                str = split2[i3];
            }
            int length2 = stringBuffer.length();
            stringBuffer.append(str);
            int length3 = stringBuffer.length();
            stringBuffer.append(StringUtils.SPACE);
            StringSpan stringSpan = new StringSpan(stringBuffer, length2, length3);
            while (i2 < length) {
                stringSpan.setFeatureValue(split2[i2].intern(), 1.0d);
                i2++;
            }
            if (this.includeTokenText) {
                stringSpan.setFeatureValue((this.textFeaturePrefix + str).intern(), 1.0d);
            }
            if (this.labelsAtEnd) {
                int i4 = i2;
                while (i2 < split2.length) {
                    arrayList.add(labelForTok(split2[i2], i2 - i4));
                    i2++;
                }
                labelsArr[i] = new Labels((Label[]) arrayList.toArray(new Label[arrayList.size()]));
            }
            stringSpanArr[i] = stringSpan;
        }
        StringTokenization stringTokenization = new StringTokenization(stringBuffer);
        stringTokenization.addAll(stringSpanArr);
        instance.setData(stringTokenization);
        instance.setTarget(new LabelsAssignment(new LabelsSequence(labelsArr)));
        return instance;
    }

    private Label labelForTok(String str, int i) {
        while (this.labelDicts.size() <= i) {
            this.labelDicts.add(new LabelAlphabet());
        }
        return ((LabelAlphabet) this.labelDicts.get(i)).lookupLabel(str);
    }

    private boolean isLabelSeparator(String[] strArr, int i) {
        return this.numLabels > 0 ? i >= this.numLabels : strArr[i].equals("----");
    }

    private void writeObject(ObjectOutputStream objectOutputStream) throws IOException {
        objectOutputStream.defaultWriteObject();
        objectOutputStream.writeInt(2);
    }

    private void readObject(ObjectInputStream objectInputStream) throws IOException, ClassNotFoundException {
        objectInputStream.defaultReadObject();
        if (objectInputStream.readInt() <= 1) {
            this.featuresIncludeToken = true;
        }
    }

    public boolean isLabelsAtEnd() {
        return this.labelsAtEnd;
    }

    public void setLabelsAtEnd(boolean z) {
        this.labelsAtEnd = z;
    }
}
