package com.wcohen.secondstring.tokens;

import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.ArrayList;

/* JADX WARN: Classes with same name are omitted:
  input_file:com/wcohen/secondstring/tokens/NGramTokenizer.class
  input_file:dist/lib/secondstring-20041015.jar:com/wcohen/secondstring/tokens/NGramTokenizer.class
 */
/* loaded from: input_file:dist/lib/secondstring-20050310.jar:com/wcohen/secondstring/tokens/NGramTokenizer.class */
public class NGramTokenizer implements Tokenizer, Serializable {
    static final long serialVersionUID = 7722773626730079634L;
    private int minNGramSize;
    private int maxNGramSize;
    private boolean keepOldTokens;
    private Tokenizer innerTokenizer;
    public static NGramTokenizer DEFAULT_TOKENIZER;
    static final boolean $assertionsDisabled;
    static Class class$com$wcohen$secondstring$tokens$NGramTokenizer;

    public NGramTokenizer(int i, int i2, boolean z, Tokenizer tokenizer) {
        this.minNGramSize = i;
        this.maxNGramSize = i2;
        this.keepOldTokens = z;
        this.innerTokenizer = tokenizer;
    }

    @Override // com.wcohen.secondstring.tokens.Tokenizer
    public Token[] tokenize(String str) {
        Token[] tokenArr = this.innerTokenizer.tokenize(str);
        ArrayList arrayList = new ArrayList();
        for (Token token : tokenArr) {
            String stringBuffer = new StringBuffer().append("^").append(token.getValue()).append("$").toString();
            if (this.keepOldTokens) {
                arrayList.add(intern(stringBuffer));
            }
            for (int i = 0; i < stringBuffer.length(); i++) {
                for (int i2 = this.minNGramSize; i2 <= this.maxNGramSize; i2++) {
                    if (i + i2 < stringBuffer.length()) {
                        arrayList.add(this.innerTokenizer.intern(stringBuffer.substring(i, i + i2)));
                    }
                }
            }
        }
        return (Token[]) arrayList.toArray(new Token[arrayList.size()]);
    }

    public String toString() {
        return new StringBuffer().append("minNGramSize: ").append(this.minNGramSize).append(" maxNGramSize: ").append(this.maxNGramSize).append(" keepOldTokens: ").append(this.keepOldTokens ? "true" : "false").toString();
    }

    @Override // com.wcohen.secondstring.tokens.Tokenizer
    public Token intern(String str) {
        return this.innerTokenizer.intern(str);
    }

    public static void main(String[] strArr) throws Exception {
        NGramTokenizer nGramTokenizer = new NGramTokenizer(1, 1, false, SimpleTokenizer.DEFAULT_TOKENIZER);
        String nGramTokenizer2 = nGramTokenizer.toString();
        int i = 0;
        for (int i2 = 0; i2 < strArr.length; i2++) {
            System.out.println(new StringBuffer().append("argument ").append(i2).append(": '").append(strArr[i2]).append("'").toString());
            Token[] tokenArr = nGramTokenizer.tokenize(strArr[i2]);
            for (int i3 = 0; i3 < tokenArr.length; i3++) {
                i++;
                System.out.println(new StringBuffer().append("token ").append(i).append(":").append(" id=").append(tokenArr[i3].getIndex()).append(" value: '").append(tokenArr[i3].getValue()).append("'").toString());
            }
        }
        new ObjectOutputStream(new FileOutputStream("test.obj")).writeObject(nGramTokenizer);
        String nGramTokenizer3 = ((NGramTokenizer) new ObjectInputStream(new FileInputStream("test.obj")).readObject()).toString();
        if (!$assertionsDisabled && !nGramTokenizer2.equals(nGramTokenizer3)) {
            throw new AssertionError(new StringBuffer().append("s1: ").append(nGramTokenizer2).append(" s2: ").append(nGramTokenizer3).toString());
        }
    }

    static Class class$(String str) {
        try {
            return Class.forName(str);
        } catch (ClassNotFoundException e) {
            throw new NoClassDefFoundError().initCause(e);
        }
    }

    static {
        Class cls;
        if (class$com$wcohen$secondstring$tokens$NGramTokenizer == null) {
            cls = class$("com.wcohen.secondstring.tokens.NGramTokenizer");
            class$com$wcohen$secondstring$tokens$NGramTokenizer = cls;
        } else {
            cls = class$com$wcohen$secondstring$tokens$NGramTokenizer;
        }
        $assertionsDisabled = !cls.desiredAssertionStatus();
        DEFAULT_TOKENIZER = new NGramTokenizer(3, 5, true, SimpleTokenizer.DEFAULT_TOKENIZER);
    }
}
