package de.dfki.km.leech.lucene.basic;

import de.dfki.inquisitor.collections.MultiValueTreeMap;
import de.dfki.inquisitor.text.Levenshtein;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.similarities.ClassicSimilarity;

/* loaded from: input_file:de/dfki/km/leech/lucene/basic/Buzzwords.class */
public class Buzzwords {
    protected static ClassicSimilarity m_defaultSimilarity = new ClassicSimilarity();

    private static String getAttributeValue(Document document, String str) {
        IndexableField field = document.getField(str);
        if (field == null) {
            return null;
        }
        return field.stringValue();
    }

    public static boolean addBuzzwords(int i, Document document, String str, Set<String> set, int i2, boolean z, IndexReader indexReader) throws Exception {
        List<String> buzzwords = getBuzzwords(i, document, set, i2, z, indexReader);
        if (buzzwords == null) {
            return false;
        }
        StringBuilder sb = new StringBuilder();
        for (int i3 = 0; i3 < Math.min(i2, buzzwords.size()); i3++) {
            sb.append(buzzwords.get(i3)).append(" ");
        }
        document.removeFields(str);
        document.add(new Field(str, sb.toString(), new DynamicFieldType().setIndexOptionS(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS).setStoreD(true).setStoreTermVectorS(true).setStoreTermVectorOffsetS(true).setTokenizeD(true).freezE()));
        return true;
    }

    protected static int docID2DocNo(String str, String str2, IndexReader indexReader) throws Exception {
        TopDocs search = new IndexSearcher(indexReader).search(new TermQuery(new Term(str, str2)), 1);
        if (search.totalHits == 0) {
            throw new Exception("no lucene document found with id '" + str2 + "'");
        }
        return search.scoreDocs[0].doc;
    }

    public static List<String> getBuzzwords(int i, Document document, Set<String> set, int i2, boolean z, IndexReader indexReader) throws Exception {
        return new LinkedList(getBuzzwordsWithTfIdf(i, document, set, i2, z, indexReader).keySet());
    }

    public static LinkedHashMap<String, Float> getBuzzwordsWithTfIdf(int i, Document document, Set<String> set, int i2, boolean z, IndexReader indexReader) throws Exception {
        MultiValueTreeMap<Float, String> retrieveInterestingTerms = retrieveInterestingTerms(i, document, set, i2, 2, 1, 2, z, indexReader);
        if (retrieveInterestingTerms.valueSize() < i2) {
            MultiValueTreeMap<Float, String> retrieveInterestingTerms2 = retrieveInterestingTerms(i, document, set, i2, 1, 1, 2, z, indexReader);
            while (retrieveInterestingTerms2.keySize() > 0) {
                Float f = (Float) retrieveInterestingTerms2.firstKey();
                String str = (String) retrieveInterestingTerms2.getFirst(f, new String[0]);
                retrieveInterestingTerms2.remove(f, str);
                if (!retrieveInterestingTerms.containsValue(str)) {
                    retrieveInterestingTerms.add(f, str);
                }
                if (retrieveInterestingTerms.valueSize() >= i2) {
                    break;
                }
            }
        }
        LinkedHashMap<String, Float> linkedHashMap = new LinkedHashMap<>();
        for (Map.Entry entry : retrieveInterestingTerms.entryList()) {
            linkedHashMap.put((String) entry.getValue(), (Float) entry.getKey());
        }
        return linkedHashMap;
    }

    public static List<Term2FrequencyEntry> getTopFrequentTerms(int i, Document document, String str, int i2, int i3, int i4, IndexReader indexReader) throws Exception {
        Terms termVector;
        LinkedList linkedList = new LinkedList();
        PriorityQueue priorityQueue = new PriorityQueue(i4, new Comparator<Term2FrequencyEntry>() { // from class: de.dfki.km.leech.lucene.basic.Buzzwords.1
            @Override // java.util.Comparator
            public int compare(Term2FrequencyEntry term2FrequencyEntry, Term2FrequencyEntry term2FrequencyEntry2) {
                return term2FrequencyEntry.getFrequency().compareTo(term2FrequencyEntry2.getFrequency());
            }
        });
        if (document.getField(str) != null && (termVector = indexReader.getTermVector(i, str)) != null) {
            TermsEnum it = termVector.iterator();
            while (it.next() != null) {
                String utf8ToString = it.term().utf8ToString();
                long j = it.totalTermFreq();
                if (j >= i2 && utf8ToString.length() >= i3) {
                    priorityQueue.add(new Term2FrequencyEntry(utf8ToString, Integer.valueOf(Long.valueOf(j).intValue())));
                }
                if (priorityQueue.size() > i4) {
                    priorityQueue.poll();
                }
            }
            Iterator it2 = priorityQueue.iterator();
            while (it2.hasNext()) {
                linkedList.add(0, (Term2FrequencyEntry) it2.next());
            }
            return linkedList;
        }
        return linkedList;
    }

    static MultiValueTreeMap<Float, String> retrieveInterestingTerms(int i, Document document, Set<String> set, int i2, int i3, int i4, int i5, boolean z, IndexReader indexReader) throws Exception {
        int numDocs = indexReader.numDocs();
        HashMap hashMap = new HashMap();
        Iterator<String> it = set.iterator();
        while (it.hasNext()) {
            for (Term2FrequencyEntry term2FrequencyEntry : getTopFrequentTerms(i, document, it.next(), i4, i5, 1234, indexReader)) {
                Integer num = (Integer) hashMap.get(term2FrequencyEntry.getTerm());
                hashMap.put(term2FrequencyEntry.getTerm(), num == null ? term2FrequencyEntry.getFrequency() : Integer.valueOf(num.intValue() + term2FrequencyEntry.getFrequency().intValue()));
            }
        }
        MultiValueTreeMap<Float, String> multiValueTreeMap = new MultiValueTreeMap<>(HashSet.class);
        for (Map.Entry entry : hashMap.entrySet()) {
            String str = (String) entry.getKey();
            Integer num2 = (Integer) entry.getValue();
            if (i4 <= 0 || num2.intValue() >= i4) {
                if (str.matches("\\D+")) {
                    int i6 = 0;
                    Iterator<String> it2 = set.iterator();
                    while (it2.hasNext()) {
                        int docFreq = indexReader.docFreq(new Term(it2.next(), str));
                        if (i6 < docFreq) {
                            i6 = docFreq;
                        }
                    }
                    if (i3 <= 0 || i6 >= i3) {
                        if (i6 != 0) {
                            float idf = m_defaultSimilarity.idf(i6, numDocs);
                            float tf = m_defaultSimilarity.tf(num2.intValue()) * idf * idf;
                            boolean z2 = false;
                            if (multiValueTreeMap.valueSize() >= i2) {
                                if (((Float) multiValueTreeMap.firstKey()).floatValue() < tf) {
                                    z2 = true;
                                }
                            }
                            Boolean bool = false;
                            if (z) {
                                Iterator it3 = multiValueTreeMap.entryList().iterator();
                                while (true) {
                                    if (!it3.hasNext()) {
                                        break;
                                    }
                                    Map.Entry entry2 = (Map.Entry) it3.next();
                                    if (Levenshtein.isInDistance((String) entry2.getValue(), str, 3)) {
                                        if (((Float) entry2.getKey()).floatValue() >= tf) {
                                            bool = true;
                                            break;
                                        }
                                        multiValueTreeMap.remove((Float) entry2.getKey(), (String) entry2.getValue());
                                    }
                                }
                            }
                            if (z2 && !bool.booleanValue()) {
                                multiValueTreeMap.remove((Float) multiValueTreeMap.firstKey());
                            }
                            if (!bool.booleanValue()) {
                                multiValueTreeMap.add(Float.valueOf(tf), str);
                            }
                        }
                    }
                }
            }
        }
        return multiValueTreeMap;
    }
}
