package dfki.km.tweekreco.ner;

import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStreamToAutomaton;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.pattern.PatternReplaceCharFilter;
import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.analyzing.FSTUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.BasicAutomata;
import org.apache.lucene.util.automaton.BasicOperations;
import org.apache.lucene.util.automaton.LevenshteinAutomata;
import org.apache.lucene.util.automaton.SpecialOperations;
import org.apache.lucene.util.automaton.UTF32ToUTF8;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PairOutputs;

/* loaded from: input_file:WEB-INF/lib/tweekreco-ner-0.1-SNAPSHOT.jar:dfki/km/tweekreco/ner/ExactMatchFuzzySuggester.class */
public class ExactMatchFuzzySuggester extends ExactMatchAnalyzingSuggester {
    private final int maxEdits;
    private final boolean transpositions;
    private final int nonFuzzyPrefix;
    private final int minFuzzyLength;
    private final boolean unicodeAware;
    public static final boolean DEFAULT_UNICODE_AWARE = false;
    public static final int DEFAULT_MIN_FUZZY_LENGTH = 3;
    public static final int DEFAULT_NON_FUZZY_PREFIX = 1;
    public static final int DEFAULT_MAX_EDITS = 1;
    public static final boolean DEFAULT_TRANSPOSITIONS = true;

    public static void main(String[] strArr) throws Exception {
        System.out.println("For named entity extraction, the 'exact' results are the only ones of interest.\nFor fuzzy match, the goal is to get all Levensthein-matching terms as 'exact' results too.");
        KeywordAnalyzer keywordAnalyzer = new KeywordAnalyzer();
        ExactMatchFuzzySuggester exactMatchFuzzySuggester = new ExactMatchFuzzySuggester(keywordAnalyzer, keywordAnalyzer, 2, 256, -1, false, 2, false, 0, 0);
        List asList = Arrays.asList("screen", "screensaver", "mouse");
        System.out.println("\nDictionary: " + asList);
        final Iterator it = asList.iterator();
        exactMatchFuzzySuggester.build(new InputIterator.InputIteratorWrapper(new BytesRefIterator() { // from class: dfki.km.tweekreco.ner.ExactMatchFuzzySuggester.1
            @Override // org.apache.lucene.util.BytesRefIterator
            public BytesRef next() throws IOException {
                if (it.hasNext()) {
                    return new BytesRef((CharSequence) it.next());
                }
                return null;
            }

            @Override // org.apache.lucene.util.BytesRefIterator
            public Comparator<BytesRef> getComparator() {
                return null;
            }
        }));
        System.out.println("\nquery: 'screan' - exact result as expected (correct). But not in any case! This is when one letter is changed, which is not the first or last one.");
        List<Lookup.LookupResult> lookup = exactMatchFuzzySuggester.lookup("screan", false, PatternReplaceCharFilter.DEFAULT_MAX_BLOCK_CHARS);
        System.out.println("All results: - double entry of 'screen'?");
        Iterator<Lookup.LookupResult> it2 = lookup.iterator();
        while (it2.hasNext()) {
            System.out.println("  " + it2.next());
        }
        System.out.println("\nquery: 'screew' - last letter changed: exact result empty (incorrect).");
        List<Lookup.LookupResult> lookup2 = exactMatchFuzzySuggester.lookup("screew", false, PatternReplaceCharFilter.DEFAULT_MAX_BLOCK_CHARS);
        System.out.println("All results:");
        Iterator<Lookup.LookupResult> it3 = lookup2.iterator();
        while (it3.hasNext()) {
            System.out.println("  " + it3.next());
        }
        System.out.println("\nquery: 'wcreen' - first letter changed: nothing found at all.");
        List<Lookup.LookupResult> lookup3 = exactMatchFuzzySuggester.lookup("wcreen", false, PatternReplaceCharFilter.DEFAULT_MAX_BLOCK_CHARS);
        System.out.println("All results:");
        Iterator<Lookup.LookupResult> it4 = lookup3.iterator();
        while (it4.hasNext()) {
            System.out.println("  " + it4.next());
        }
        System.out.println("\nquery: 'scree' - last letter removed.");
        List<Lookup.LookupResult> lookup4 = exactMatchFuzzySuggester.lookup("scree", false, PatternReplaceCharFilter.DEFAULT_MAX_BLOCK_CHARS);
        System.out.println("All results:");
        Iterator<Lookup.LookupResult> it5 = lookup4.iterator();
        while (it5.hasNext()) {
            System.out.println("  " + it5.next());
        }
        System.out.println("\nquery: 'scren' - 5th letter removed. Same as with last removed letter.");
        List<Lookup.LookupResult> lookup5 = exactMatchFuzzySuggester.lookup("scren", false, PatternReplaceCharFilter.DEFAULT_MAX_BLOCK_CHARS);
        System.out.println("All results:");
        Iterator<Lookup.LookupResult> it6 = lookup5.iterator();
        while (it6.hasNext()) {
            System.out.println("  " + it6.next());
        }
        System.out.println("\nquery: 'sreen' - 2th letter removed. Why different?");
        List<Lookup.LookupResult> lookup6 = exactMatchFuzzySuggester.lookup("sreen", false, PatternReplaceCharFilter.DEFAULT_MAX_BLOCK_CHARS);
        System.out.println("All results: - double entry of 'screen'?");
        Iterator<Lookup.LookupResult> it7 = lookup6.iterator();
        while (it7.hasNext()) {
            System.out.println("  " + it7.next());
        }
        System.out.println("\nquery: 'screen' - correct query: screen not found at all?");
        List<Lookup.LookupResult> lookup7 = exactMatchFuzzySuggester.lookup("screen", false, PatternReplaceCharFilter.DEFAULT_MAX_BLOCK_CHARS);
        System.out.println("All results:");
        Iterator<Lookup.LookupResult> it8 = lookup7.iterator();
        while (it8.hasNext()) {
            System.out.println("  " + it8.next());
        }
    }

    public ExactMatchFuzzySuggester(Analyzer analyzer) {
        this(analyzer, analyzer);
    }

    public ExactMatchFuzzySuggester(Analyzer analyzer, Analyzer analyzer2) {
        this(analyzer, analyzer2, 0, 256, -1, false, 1, false, 0, 0);
    }

    public ExactMatchFuzzySuggester(Analyzer analyzer, Analyzer analyzer2, int i, int i2, int i3, boolean z, int i4, boolean z2, int i5, int i6) {
        super(analyzer, analyzer2, i, i2, i3, z);
        if (i4 < 0 || i4 > 2) {
            throw new IllegalArgumentException("maxEdits must be between 0 and 2");
        }
        if (i5 < 0) {
            throw new IllegalArgumentException("nonFuzzyPrefix must not be >= 0 (got " + i5 + ")");
        }
        if (i6 < 0) {
            throw new IllegalArgumentException("minFuzzyLength must not be >= 0 (got " + i6 + ")");
        }
        this.maxEdits = i4;
        this.transpositions = z2;
        this.nonFuzzyPrefix = i5;
        this.minFuzzyLength = i6;
        this.unicodeAware = false;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester
    public List<FSTUtil.Path<PairOutputs.Pair<Long, BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<PairOutputs.Pair<Long, BytesRef>>> list, Automaton automaton, FST<PairOutputs.Pair<Long, BytesRef>> fst) throws IOException {
        System.out.println("fullPrefixPaths Fuzzy");
        return FSTUtil.intersectPrefixPaths(convertAutomaton(toLevenshteinAutomata(automaton)), fst);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester
    public Automaton convertAutomaton(Automaton automaton) {
        if (!this.unicodeAware) {
            return automaton;
        }
        Automaton convert = new UTF32ToUTF8().convert(automaton);
        BasicOperations.determinize(convert);
        return convert;
    }

    TokenStreamToAutomaton getTokenStreamToAutomaton() {
        System.out.println("blaautomaton");
        TokenStreamToAutomaton tokenStreamToAutomaton = new TokenStreamToAutomaton();
        tokenStreamToAutomaton.setUnicodeArcs(this.unicodeAware);
        return tokenStreamToAutomaton;
    }

    Automaton toLevenshteinAutomata(Automaton automaton) {
        Set<IntsRef> finiteStrings = SpecialOperations.getFiniteStrings(automaton, -1);
        Automaton[] automatonArr = new Automaton[finiteStrings.size()];
        int i = 0;
        for (IntsRef intsRef : finiteStrings) {
            if (intsRef.length <= this.nonFuzzyPrefix || intsRef.length < this.minFuzzyLength) {
                automatonArr[i] = BasicAutomata.makeString(intsRef.ints, intsRef.offset, intsRef.length);
                i++;
            } else {
                Automaton makeString = BasicAutomata.makeString(intsRef.ints, intsRef.offset, this.nonFuzzyPrefix);
                int[] iArr = new int[intsRef.length - this.nonFuzzyPrefix];
                System.arraycopy(intsRef.ints, intsRef.offset + this.nonFuzzyPrefix, iArr, 0, iArr.length);
                Automaton concatenate = BasicOperations.concatenate(Arrays.asList(makeString, new LevenshteinAutomata(iArr, this.unicodeAware ? 1114111 : 255, this.transpositions).toAutomaton(this.maxEdits)));
                concatenate.setDeterministic(true);
                automatonArr[i] = concatenate;
                i++;
            }
        }
        if (automatonArr.length == 0) {
            return BasicAutomata.makeEmpty();
        }
        if (automatonArr.length == 1) {
            return automatonArr[0];
        }
        Automaton union = BasicOperations.union(Arrays.asList(automatonArr));
        BasicOperations.determinize(union);
        return union;
    }
}
