package de.dfki.km.exact.lucene.util;

import de.dfki.km.exact.lucene.LUCooccurrence;
import de.dfki.km.exact.lucene.LUSearcher;
import de.dfki.km.exact.lucene.meta.LUMetaSearcher;
import de.dfki.km.exact.lucene.voc.FIELD;
import de.dfki.km.exact.math.EUMath;
import de.dfki.km.exact.misc.EULogger;
import de.dfki.km.exact.nlp.NLP;
import de.dfki.km.exact.nlp.analyser.EUAnalyserFactory;
import java.util.SortedSet;
import java.util.TreeSet;

/* loaded from: input_file:WEB-INF/lib/lucene-util-17-20140430.114905-2.jar:de/dfki/km/exact/lucene/util/LUContextExtraktor.class */
public class LUContextExtraktor {
    public int size;
    public int windowSize;
    public int minFreqClass;
    public int windowNumber;
    public LUSearcher searcher;
    public LUMetaSearcher meta;
    public LUCoocurrencyFilter mFilter;

    public LUContextExtraktor(int i, int i2, int i3, LUSearcher lUSearcher, LUMetaSearcher lUMetaSearcher) {
        this.size = i;
        this.meta = lUMetaSearcher;
        this.searcher = lUSearcher;
        this.minFreqClass = i3;
        this.windowSize = (2 * i) + 1;
        this.mFilter = getAnalyser(i2);
        this.windowNumber = lUSearcher.getWindowNumber(this.windowSize);
        EULogger.info("window number: " + this.windowNumber);
    }

    public SortedSet<LUCooccurrence> getContext(String str) {
        int frequency;
        int frequency2 = this.meta.getFrequency(str);
        TreeSet treeSet = new TreeSet();
        for (LUCooccurrence lUCooccurrence : this.searcher.getCoocurences(1, this.size, FIELD.CONTENT, str)) {
            if (!this.mFilter.filter(lUCooccurrence)) {
                if (lUCooccurrence.getFrequency() >= 10) {
                    int frequency3 = this.meta.getFrequency(lUCooccurrence.getTerm());
                    if (frequency3 >= this.minFreqClass) {
                        lUCooccurrence.setSignificance(getSignificance(lUCooccurrence.getFrequency(), frequency2, frequency3, this.windowNumber));
                        treeSet.add(lUCooccurrence);
                    }
                } else if (lUCooccurrence.getFrequency() < 10 && (frequency = this.meta.getFrequency(lUCooccurrence.getTerm())) >= this.minFreqClass) {
                    lUCooccurrence.setSignificance(getSignificance2(lUCooccurrence.getFrequency(), frequency2, frequency, this.windowNumber));
                    treeSet.add(lUCooccurrence);
                }
            }
        }
        return treeSet;
    }

    public static void main(String[] strArr) throws Exception {
        for (LUCooccurrence lUCooccurrence : new LUContextExtraktor(7, 2, 7, LULocal.getSearcherWikipediaDE(), LULocal.getMetaSearcherWikipediaDE()).getContext("depression")) {
            if (lUCooccurrence.getTerm().contains("pres")) {
                EULogger.info(lUCooccurrence.toString());
            }
        }
    }

    private static double getSignificance(int i, int i2, int i3, int i4) {
        return (i * ((Math.log(i) - Math.log((i2 * i3) / i4)) - 1.0d)) / Math.log(i4);
    }

    private static double getSignificance2(int i, int i2, int i3, int i4) {
        double d = (i2 * i3) / i4;
        return ((d - (i * Math.log(d))) + Math.log(EUMath.factorial(i))) / Math.log(i4);
    }

    private static LUCoocurrencyFilter getAnalyser(int i) {
        return new LUCoocurrencyFilter(i, EUAnalyserFactory.getAnalyser(4, NLP.LANGUAGE.de, NLP.CommonDelimeter, NLP.SpecialDelimeter));
    }
}
