package de.dfki.km.exact.lucene.meta;

import de.dfki.km.exact.lucene.LUDefaultSimilarity;
import de.dfki.km.exact.lucene.LUDefaultTermFilter;
import de.dfki.km.exact.lucene.LURecycler;
import de.dfki.km.exact.lucene.LUSearcher;
import de.dfki.km.exact.lucene.LUTermInfo;
import de.dfki.km.exact.lucene.LUWeightedTerm;
import de.dfki.km.exact.lucene.api.LUSimilarity;
import de.dfki.km.exact.lucene.api.LUTermFilter;
import de.dfki.km.exact.lucene.voc.FIELD;
import de.dfki.km.exact.math.Average;
import de.dfki.km.exact.nlp.NLP;
import java.io.StringReader;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
import java.util.logging.Logger;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;

/* loaded from: input_file:de/dfki/km/exact/lucene/meta/LUTermSearcher.class */
public class LUTermSearcher {
    private LUTermFilter mTermFilter;
    private LUSearcher mIndexSearcher;
    private LUMetaSearcher mMetaSearcher;
    private static final Logger sLogger = Logger.getLogger(LUTermSearcher.class.getName());
    private int mWordNumber = 1;
    private LUSimilarity mSimilarity = new LUDefaultSimilarity();

    public LUTermSearcher(String str, NLP.LANGUAGE language) throws Exception {
        this.mIndexSearcher = new LUSearcher(str);
        this.mTermFilter = new LUDefaultTermFilter(language);
    }

    public LUTermSearcher(String str, String str2, NLP.LANGUAGE language) throws Exception {
        this.mMetaSearcher = new LUMetaSearcher(str2);
        this.mIndexSearcher = new LUSearcher(str);
        this.mTermFilter = new LUDefaultTermFilter(language);
        this.mSimilarity.setNumDocs(this.mIndexSearcher.getIndexSearcher().maxDoc());
    }

    public final void setMetaSearcher(String str) {
        try {
            this.mMetaSearcher = new LUMetaSearcher(str);
        } catch (Exception e) {
            this.mMetaSearcher = null;
            sLogger.warning(e.getMessage());
        }
    }

    public void setMaxWordNumber(int i) {
        this.mWordNumber = i;
    }

    public final Set<LUWeightedTerm> getRelevantTerms(String str) throws Exception {
        Set<LUWeightedTerm> relevantSingleWordTerms = getRelevantSingleWordTerms(str);
        if (this.mWordNumber > 1 && this.mMetaSearcher != null) {
            relevantSingleWordTerms.addAll(getRelevantMultiWordTerms(str));
        }
        return relevantSingleWordTerms;
    }

    public final Set<LUWeightedTerm> getRelevantTerms(String str, int i) throws Exception {
        Set<LUWeightedTerm> relevantSingleWordTerms = getRelevantSingleWordTerms(str, i);
        if (this.mWordNumber > 1 && this.mMetaSearcher != null) {
            relevantSingleWordTerms.addAll(getRelevantMultiWordTerms(str, i));
        }
        return relevantSingleWordTerms;
    }

    public final Set<LUWeightedTerm> getRelevantSingleWordTerms(String str) throws Exception {
        return getRelevantSingleWordTerms(FIELD.CONTENT, getTermFreqVector(str));
    }

    public final Set<LUWeightedTerm> getRelevantMultiWordTerms(String str) throws Exception {
        return getRelevantMultiWordTerms(FIELD.CONTENT, getTermFreqVector(str));
    }

    public final Set<LUWeightedTerm> getRelevantSingleWordTerms(String str, int i) throws Exception {
        return getRelevantSingleWordTerms(str, this.mIndexSearcher.getIndexSearcher().getIndexReader().getTermFreqVector(i, str));
    }

    public final Set<LUWeightedTerm> getRelevantMultiWordTerms(String str, int i) throws Exception {
        return getRelevantMultiWordTerms(str, this.mIndexSearcher.getIndexSearcher().getIndexReader().getTermFreqVector(i, str));
    }

    private final Set<LUWeightedTerm> getRelevantSingleWordTerms(String str, TermFreqVector termFreqVector) throws Exception {
        TreeSet treeSet = new TreeSet();
        this.mSimilarity.setMaxFreq(Average.getIntMax(termFreqVector.getTermFrequencies()));
        for (int i = 0; i < termFreqVector.getTerms().length; i++) {
            String str2 = termFreqVector.getTerms()[i];
            if (!this.mTermFilter.filterInside(str2)) {
                this.mSimilarity.setFreq(termFreqVector.getTermFrequencies()[i]);
                this.mSimilarity.setDocFreq(this.mIndexSearcher.getIndexSearcher().docFreq(new Term(str, str2)));
                treeSet.add(new LUWeightedTerm(str2, this.mSimilarity.calculate()));
            }
        }
        return treeSet;
    }

    private final TermFreqVector getTermFreqVector(String str) throws Exception {
        RAMDirectory rAMDirectory = new RAMDirectory();
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_31, new StandardAnalyzer(Version.LUCENE_31, new HashSet()));
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        IndexWriter indexWriter = new IndexWriter(rAMDirectory, indexWriterConfig);
        Document document = new Document();
        document.add(new Field(FIELD.CONTENT, new StringReader(str), Field.TermVector.WITH_POSITIONS));
        document.add(new Field(FIELD.URI, FIELD.RAM_DOC, Field.Store.YES, Field.Index.NOT_ANALYZED));
        indexWriter.addDocument(document);
        indexWriter.close();
        IndexSearcher indexSearcher = new IndexSearcher(rAMDirectory);
        TermQuery termQuery = new TermQuery(new Term(FIELD.URI, FIELD.RAM_DOC));
        TopScoreDocCollector create = TopScoreDocCollector.create(1, true);
        indexSearcher.search(termQuery, create);
        TermFreqVector termFreqVector = indexSearcher.getIndexReader().getTermFreqVector(create.topDocs().scoreDocs[0].doc, FIELD.CONTENT);
        indexSearcher.close();
        return termFreqVector;
    }

    private final Set<LUWeightedTerm> getRelevantMultiWordTerms(String str, TermFreqVector termFreqVector) throws Exception {
        this.mSimilarity.setMaxFreq(Average.getIntMax(termFreqVector.getTermFrequencies()));
        TreeSet treeSet = new TreeSet();
        String[] recycle = LURecycler.recycle((TermPositionVector) termFreqVector);
        HashMap hashMap = new HashMap();
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < recycle.length; i++) {
            sb.setLength(0);
            sb.append(recycle[i]);
            for (int i2 = 1; i2 < this.mWordNumber; i2++) {
                int i3 = i + i2;
                if (i3 < recycle.length) {
                    String str2 = recycle[i3];
                    sb.append(" ");
                    sb.append(str2);
                    String sb2 = sb.toString();
                    LUTermInfo lUTermInfo = (LUTermInfo) hashMap.get(sb2);
                    if (lUTermInfo != null) {
                        lUTermInfo.increaseFrequency(1);
                    } else {
                        int docFrequency = this.mMetaSearcher.getDocFrequency(sb2);
                        if (docFrequency > 0) {
                            LUTermInfo lUTermInfo2 = new LUTermInfo(sb2);
                            lUTermInfo2.increaseFrequency(1);
                            lUTermInfo2.increaseDocFrequency(docFrequency);
                            hashMap.put(sb2, lUTermInfo2);
                        }
                    }
                }
            }
        }
        Iterator it = hashMap.keySet().iterator();
        while (it.hasNext()) {
            LUTermInfo lUTermInfo3 = (LUTermInfo) hashMap.get((String) it.next());
            if (lUTermInfo3 != null) {
                this.mSimilarity.setDocFreq(lUTermInfo3.getDocFrequency());
                this.mSimilarity.setFreq(lUTermInfo3.getFrequency());
                treeSet.add(new LUWeightedTerm(lUTermInfo3.getTerm(), this.mSimilarity.calculate()));
            }
        }
        return treeSet;
    }

    public LUMetaSearcher getMetaSearcher() {
        return this.mMetaSearcher;
    }

    public LUSearcher getIndexSearcher() {
        return this.mIndexSearcher;
    }

    public int getMaxWordNumber() {
        return this.mWordNumber;
    }
}
