package de.dfki.km.exact.lucene.meta;

import de.dfki.km.exact.lucene.LUQueryFactory;
import de.dfki.km.exact.lucene.LUSearcher;
import de.dfki.km.exact.lucene.voc.DEFAULT;
import de.dfki.km.exact.lucene.voc.FIELD;
import de.dfki.km.exact.misc.EUString;
import de.dfki.km.exact.nlp.EUDigit;
import de.dfki.km.exact.nlp.EUStopWord;
import de.dfki.km.exact.nlp.NGram;
import de.dfki.km.exact.nlp.NLP;
import de.dfki.km.exact.nlp.similarity.Levenshtein;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;

/* loaded from: input_file:de/dfki/km/exact/lucene/meta/LUMetaSearcher.class */
public class LUMetaSearcher extends LUSearcher {
    private int mNGramLength;
    private String mNGramField;
    private double mSimilarity;
    private boolean mExtendNGrams;
    private int mSimilaritySearchNumber;
    private String[] mStopWordFrequencies;
    private Map<String, Set<String>> mSimilarityCache;
    private static final Logger sLogger = Logger.getLogger(LUMetaSearcher.class.getName());

    public LUMetaSearcher(String str) throws Exception {
        super(str);
        init();
    }

    public LUMetaSearcher(Directory directory) throws Exception {
        super(directory);
        init();
    }

    private void init() {
        setNGramType(NGRAM_TYPE);
        this.mSimilarity = 0.7d;
        this.mExtendNGrams = true;
        this.mSimilaritySearchNumber = 50;
        setMaxStopWordFrequencyClass(4);
        this.mSimilarityCache = new HashMap();
    }

    public final void setMaxStopWordFrequencyClass(int i) {
        this.mStopWordFrequencies = new String[i];
        for (int i2 = 0; i2 < i; i2++) {
            this.mStopWordFrequencies[i2] = String.valueOf(i2 + 1);
        }
    }

    public final void setSimilarity(double d) {
        this.mSimilarity = d;
    }

    public final void setSimilaritySearchNumber(int i) {
        this.mSimilaritySearchNumber = i;
    }

    public final List<String> getStopWords() {
        LinkedList linkedList = new LinkedList();
        try {
            for (ScoreDoc scoreDoc : search(DEFAULT.MAX_HIT_NUMBER, LUQueryFactory.getBooleanQuery(FIELD.FREQUENCY_CLASS, BooleanClause.Occur.SHOULD, this.mStopWordFrequencies))) {
                String value = getValue(scoreDoc.doc, FIELD.TERM);
                if (!EUDigit.hasDigit(value)) {
                    linkedList.add(value);
                }
            }
        } catch (Exception e) {
            sLogger.log(Level.SEVERE, e.getMessage());
        }
        return linkedList;
    }

    public final int getFrequency(String str) {
        try {
            ScoreDoc[] search = search(1, new TermQuery(new Term(FIELD.TERM_NA, str)));
            if (0 < search.length) {
                return Integer.valueOf(getValue(search[0].doc, FIELD.FREQUENCY)).intValue();
            }
            return -1;
        } catch (Exception e) {
            sLogger.log(Level.WARNING, e.getMessage());
            return -1;
        }
    }

    public final int getDocFrequency(String str) {
        try {
            ScoreDoc[] search = search(1, new TermQuery(new Term(FIELD.TERM_NA, str)));
            if (0 < search.length) {
                return Integer.valueOf(getValue(search[0].doc, FIELD.DOC_FREQUENCY)).intValue();
            }
            return -1;
        } catch (Exception e) {
            sLogger.log(Level.WARNING, e.getMessage());
            return -1;
        }
    }

    public final int getFrequencyClass(String str) {
        return getFrequencyClass(str, false);
    }

    public final int getMaxFrequency() throws Exception {
        ScoreDoc[] search = search(1, new TermQuery(new Term(FIELD.MAX_FREQUENT_TERM, DEFAULT.MAX_FREQUENT_TERM)));
        if (0 < search.length) {
            return Integer.valueOf(getValue(search[0].doc, FIELD.MAX_FREQUENCY)).intValue();
        }
        return -1;
    }

    public final int getFrequencyClass(String str, boolean z) {
        try {
            if (z) {
                ScoreDoc[] search = search(1, new TermQuery(new Term(FIELD.TERM, str)));
                if (0 < search.length) {
                    return Integer.valueOf(getValue(search[0].doc, FIELD.FREQUENCY_CLASS)).intValue();
                }
                return -1;
            }
            ScoreDoc[] search2 = search(1, new TermQuery(new Term(FIELD.TERM_NA, str)));
            if (0 < search2.length) {
                return Integer.valueOf(getValue(search2[0].doc, FIELD.FREQUENCY_CLASS)).intValue();
            }
            return -1;
        } catch (Exception e) {
            sLogger.log(Level.WARNING, e.getMessage());
            return -1;
        }
    }

    private final Set<String> getSimiliarMultiWordTerms(String str, String[] strArr, int i, double d) throws Exception {
        BooleanQuery booleanQuery = new BooleanQuery();
        HashSet hashSet = new HashSet();
        for (String str2 : strArr) {
            Iterator<String> it = getSimiliarSingleWordTerms(str2, i, d).iterator();
            while (it.hasNext()) {
                booleanQuery.add(new TermQuery(new Term(FIELD.TERM, it.next())), BooleanClause.Occur.SHOULD);
            }
        }
        for (ScoreDoc scoreDoc : search(i, booleanQuery)) {
            String value = getValue(scoreDoc.doc, FIELD.TERM);
            if (1.0d - (Levenshtein.compute(value, str) / str.length()) >= d) {
                hashSet.add(value);
            }
        }
        return hashSet;
    }

    private final Set<String> getSimiliarSingleWordTerms(String str, int i, double d) {
        Set<String> set = this.mSimilarityCache.get(str);
        if (set != null) {
            return set;
        }
        HashSet hashSet = new HashSet();
        this.mSimilarityCache.put(str, hashSet);
        if (EUStopWord.isStopWord(str, NLP.LANGUAGE.any)) {
            hashSet.add(str);
            return hashSet;
        }
        try {
            String[] nGrams = NGram.getNGrams(this.mNGramLength, this.mExtendNGrams, str);
            BooleanQuery booleanQuery = new BooleanQuery();
            for (String str2 : nGrams) {
                booleanQuery.add(new TermQuery(new Term(this.mNGramField, str2)), BooleanClause.Occur.SHOULD);
            }
            for (ScoreDoc scoreDoc : search(i, booleanQuery)) {
                String value = getValue(scoreDoc.doc, FIELD.TERM);
                if (Levenshtein.compare(str, value) >= d) {
                    hashSet.add(value);
                }
            }
        } catch (Exception e) {
            sLogger.warning(e.getMessage());
        }
        return hashSet;
    }

    public final void setNGramType(NLP.NGramType nGramType) {
        if (nGramType == NLP.NGramType.TRIGRAM) {
            this.mNGramField = FIELD.TRIGRAM;
            this.mNGramLength = 3;
        } else if (nGramType == NLP.NGramType.TETRAGRAM) {
            this.mNGramField = FIELD.TETRAGRAM;
            this.mNGramLength = 4;
        } else if (nGramType == NLP.NGramType.PENTAGRAM) {
            this.mNGramField = FIELD.PENTAGRAM;
            this.mNGramLength = 5;
        }
    }

    public final Set<String> getSimiliarTerms(String str) throws Exception {
        return getSimiliarTerms(str, this.mSimilaritySearchNumber, this.mSimilarity);
    }

    public final Set<String> getSimiliarTerms(String str, int i, double d) throws Exception {
        String[] split = EUString.split(str);
        return split.length > 1 ? getSimiliarMultiWordTerms(str, split, i, d) : getSimiliarSingleWordTerms(str, i, d);
    }
}
