package cc.mallet.share.upenn.ner;

import cc.mallet.pipe.Pipe;
import cc.mallet.types.Instance;
import cc.mallet.types.Token;
import cc.mallet.types.TokenSequence;
import gnu.trove.THashSet;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Serializable;
import java.util.Set;
import java.util.StringTokenizer;
import org.apache.commons.lang3.StringUtils;

/* loaded from: input_file:WEB-INF/lib/mallet-2.0.7.jar:cc/mallet/share/upenn/ner/ListMember.class */
public class ListMember extends Pipe implements Serializable {
    String name;
    Set lexicon;
    boolean ignoreCase;
    int min;
    int max;

    public ListMember(String str, File file, boolean z) {
        this.name = str;
        this.ignoreCase = z;
        if (!file.exists()) {
            throw new IllegalArgumentException("File " + file + " not found.");
        }
        try {
            this.lexicon = new THashSet();
            this.min = 99999;
            this.max = -1;
            BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
            while (bufferedReader.ready()) {
                String trim = bufferedReader.readLine().trim();
                if (!trim.equals("")) {
                    int countTokens = countTokens(trim);
                    if (countTokens < this.min) {
                        this.min = countTokens;
                    }
                    if (countTokens > this.max) {
                        this.max = countTokens;
                    }
                    if (z) {
                        this.lexicon.add(trim.toLowerCase());
                    } else {
                        this.lexicon.add(trim);
                    }
                }
            }
        } catch (IOException e) {
            System.err.println("Problem with " + file + ": " + e);
            System.exit(0);
        }
    }

    @Override // cc.mallet.pipe.Pipe
    public Instance pipe(Instance instance) {
        TokenSequence tokenSequence = (TokenSequence) instance.getData();
        boolean[] zArr = new boolean[tokenSequence.size()];
        for (int i = 0; i < tokenSequence.size(); i++) {
            StringBuffer stringBuffer = new StringBuffer();
            StringBuffer stringBuffer2 = new StringBuffer();
            for (int i2 = i; i2 < i + this.max && i2 < tokenSequence.size(); i2++) {
                String text = ((Token) tokenSequence.get(i2)).getText();
                stringBuffer.append(text);
                if (stringBuffer2.length() == 0) {
                    stringBuffer2.append(text);
                } else {
                    stringBuffer2.append(StringUtils.SPACE + text);
                }
                String lowerCase = this.ignoreCase ? stringBuffer.toString().toLowerCase() : stringBuffer.toString();
                String lowerCase2 = this.ignoreCase ? stringBuffer2.toString().toLowerCase() : stringBuffer2.toString();
                if ((i2 - i) + 1 >= this.min && (this.lexicon.contains(lowerCase) || this.lexicon.contains(lowerCase2))) {
                    markFrom(i, i2, zArr);
                }
            }
        }
        for (int i3 = 0; i3 < tokenSequence.size(); i3++) {
            if (zArr[i3]) {
                ((Token) tokenSequence.get(i3)).setFeatureValue(this.name, 1.0d);
            }
        }
        return instance;
    }

    private void markFrom(int i, int i2, boolean[] zArr) {
        for (int i3 = i; i3 <= i2; i3++) {
            zArr[i3] = true;
        }
    }

    private int countTokens(String str) {
        return new StringTokenizer(str, "~`!@#$%^&*()_-+={[}]|\\:;\"',<.>?/ \t\n\r", true).countTokens();
    }
}
