package de.dfki.km.exact.lucene.misc;

import de.dfki.km.exact.lucene.LUDefaultTermFilter;
import de.dfki.km.exact.lucene.LUDefaultWeighter;
import de.dfki.km.exact.lucene.LUSearcher;
import de.dfki.km.exact.lucene.LUTermInfo;
import de.dfki.km.exact.lucene.LUWeightedTerm;
import de.dfki.km.exact.lucene.LUWriter;
import de.dfki.km.exact.lucene.api.LUTermFilter;
import de.dfki.km.exact.lucene.api.LUTermWeighter;
import de.dfki.km.exact.lucene.file.LUDcoumentFactory;
import de.dfki.km.exact.lucene.voc.FIELD;
import de.dfki.km.exact.misc.EUPrinter;
import de.dfki.km.exact.nlp.NLP;
import java.util.Set;
import java.util.TreeSet;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;

/* loaded from: input_file:de/dfki/km/exact/lucene/misc/LUTermExtractor.class */
public class LUTermExtractor {
    private LUTermFilter mFilter;
    private LUSearcher mSearcher;
    private LUTermWeighter mWeighter;

    public LUTermExtractor(LUSearcher lUSearcher) {
        this(new LUDefaultTermFilter(NLP.LANGUAGE.any), new LUDefaultWeighter(lUSearcher), lUSearcher);
    }

    public LUTermExtractor(LUTermFilter lUTermFilter, LUTermWeighter lUTermWeighter, LUSearcher lUSearcher) {
        this.mFilter = lUTermFilter;
        this.mWeighter = lUTermWeighter;
        this.mSearcher = lUSearcher;
        this.mWeighter.setMaxDocNumber(lUSearcher.getIndexSearcher().getIndexReader().maxDoc());
    }

    public final Set<LUWeightedTerm> getRelevantSingleWordTerms(String str, int i) throws Exception {
        return getRelevantSingleWordTerms(str, this.mSearcher.getIndexSearcher().getIndexReader().getTermVector(i, str).iterator((TermsEnum) null));
    }

    public final Set<LUWeightedTerm> getRelevantSingleWordTerms(String str, TermsEnum termsEnum) throws Exception {
        BytesRef next = termsEnum.next();
        this.mWeighter.setMaxFreq(getMaxFrequency(str, termsEnum));
        TreeSet treeSet = new TreeSet();
        while (next != null) {
            String str2 = new String(next.bytes, next.offset, next.length);
            if (!this.mFilter.filter(str2)) {
                this.mWeighter.setFreq(termsEnum.docsAndPositions((Bits) null, (DocsAndPositionsEnum) null).freq());
                this.mWeighter.setDocFreq(termsEnum.docFreq());
                treeSet.add(new LUWeightedTerm(str2, this.mWeighter.calculate()));
            }
            next = termsEnum.next();
        }
        return treeSet;
    }

    public final Set<LUWeightedTerm> getRelevantSingleWordTerms(String str) throws Exception {
        TermsEnum termsEnum = this.mSearcher.getTermsEnum(str);
        TreeSet treeSet = new TreeSet();
        AtomicReader wrap = SlowCompositeReaderWrapper.wrap(this.mSearcher.getIndexSearcher().getIndexReader());
        BytesRef next = termsEnum.next();
        while (true) {
            BytesRef bytesRef = next;
            if (bytesRef == null) {
                return treeSet;
            }
            String str2 = new String(bytesRef.bytes, bytesRef.offset, bytesRef.length);
            if (!this.mFilter.filter(str2)) {
                LUTermInfo lUTermInfo = new LUTermInfo(str2);
                DocsAndPositionsEnum termPositionsEnum = wrap.termPositionsEnum(new Term(str, str2));
                while (termPositionsEnum.nextDoc() != Integer.MAX_VALUE) {
                    lUTermInfo.increaseFrequency(termPositionsEnum.freq());
                    lUTermInfo.increaseDocFrequency(1);
                }
                this.mWeighter.setDocFreq(lUTermInfo.getDocFrequency());
                this.mWeighter.setFreq(lUTermInfo.getFrequency());
                treeSet.add(new LUWeightedTerm(str2, this.mWeighter.calculate()));
            }
            next = termsEnum.next();
        }
    }

    public final Set<LUWeightedTerm> getRelevantMultiWordTerms(int i, String str) throws Exception {
        TreeSet treeSet = new TreeSet();
        for (String str2 : this.mSearcher.getMultiWordTerms(i, str)) {
            LUTermInfo multiWordTermInfo = this.mSearcher.getMultiWordTermInfo(str2, str);
            if (multiWordTermInfo != null) {
                this.mWeighter.setDocFreq(multiWordTermInfo.getDocFrequency());
                this.mWeighter.setFreq(multiWordTermInfo.getFrequency());
                treeSet.add(new LUWeightedTerm(str2, this.mWeighter.calculate()));
            } else {
                System.out.println(str2 + " has no term info!");
            }
        }
        return treeSet;
    }

    private final int getMaxFrequency(String str, TermsEnum termsEnum) throws Exception {
        int i = -1;
        BytesRef next = termsEnum.next();
        while (next != null) {
            int freq = termsEnum.docsAndPositions((Bits) null, (DocsAndPositionsEnum) null).freq();
            if (freq > i) {
                i = freq;
            }
            next = termsEnum.next();
        }
        return i;
    }

    public static void main(String[] strArr) throws Exception {
        RAMDirectory rAMDirectory = new RAMDirectory();
        LUWriter lUWriter = new LUWriter((Directory) rAMDirectory);
        lUWriter.create();
        lUWriter.add(LUDcoumentFactory.getDocument("a", "b", "dies ist das unternehmen einer"));
        lUWriter.close();
        EUPrinter.printLines(new LUTermExtractor(new LUSearcher((Directory) rAMDirectory)).getRelevantMultiWordTerms(3, FIELD.CONTENT));
    }
}
