package de.dfki.km.exact.nlp.sfc;

import de.dfk.km.exact.nlp.wkt2j.api.Lexem;
import de.dfk.km.exact.nlp.wkt2j.api.Lexicon;
import de.dfk.km.exact.nlp.wkt2j.impl.WiktionaryFactory;
import de.dfki.km.exact.file.EUFileReader;
import de.dfki.km.exact.lucene.LUFieldFactory;
import de.dfki.km.exact.lucene.LUQueryFactory;
import de.dfki.km.exact.lucene.LURAMWriter;
import de.dfki.km.exact.lucene.LURecycler;
import de.dfki.km.exact.lucene.LUSearcher;
import de.dfki.km.exact.lucene.meta.LUMetaSearcher;
import de.dfki.km.exact.lucene.misc.LUContextVerifier;
import de.dfki.km.exact.lucene.misc.LULabelAnalyser;
import de.dfki.km.exact.lucene.misc.LULocal;
import de.dfki.km.exact.math.Average;
import de.dfki.km.exact.math.VMATH;
import de.dfki.km.exact.misc.EULogger;
import de.dfki.km.exact.misc.EUString;
import de.dfki.km.exact.nlp.EUTerm;
import de.dfki.km.exact.nlp.FrequencyClass;
import de.dfki.km.exact.time.Watch;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.RAMDirectory;

/* loaded from: input_file:de/dfki/km/exact/nlp/sfc/XSemFreqClass.class */
public class XSemFreqClass {
    private Lexicon mLexicon;
    private LUSearcher mIndex;
    private int mMaxFrequency;
    private VMATH.AVGTYPE mAverageType = VMATH.AVGTYPE.ARITH;
    private LUMetaSearcher mMeta;
    private LULabelAnalyser mAnalyser;
    private LUContextVerifier mVerifier;

    public XSemFreqClass(List<String> list, LULabelAnalyser lULabelAnalyser, LUSearcher lUSearcher, LUMetaSearcher lUMetaSearcher) throws Exception {
        this.mMeta = lUMetaSearcher;
        this.mIndex = lUSearcher;
        this.mAnalyser = lULabelAnalyser;
        this.mMaxFrequency = lUMetaSearcher.getMaxFrequency();
        this.mVerifier = new LUContextVerifier(lUSearcher, lUMetaSearcher, list);
    }

    public double compute(String str) {
        double d = -1.0d;
        List terms = this.mAnalyser.analyse(str).getTerms();
        if (terms.size() == 1) {
            d = compute((EUTerm) terms.get(0));
        } else if (terms.size() > 1) {
            LinkedList linkedList = new LinkedList();
            Iterator it = terms.iterator();
            while (it.hasNext()) {
                linkedList.add(Double.valueOf(compute((EUTerm) it.next())));
            }
            d = Average.getAverage(this.mAverageType, linkedList);
        }
        return d;
    }

    public void setLexicon(Lexicon lexicon) {
        this.mLexicon = lexicon;
    }

    public double compute(EUTerm eUTerm) {
        if (eUTerm.getWords().length == 1) {
            getSingleWordExpressionValue(eUTerm.getWords()[0]);
        }
        return getMultiWordExpressionValue(eUTerm.getWords());
    }

    private double getMultiWordExpressionValue(String[] strArr) {
        Set<String> forms = getForms(EUString.append(strArr));
        try {
            RAMDirectory rAMDirectory = new RAMDirectory();
            LURAMWriter lURAMWriter = new LURAMWriter(rAMDirectory);
            lURAMWriter.create();
            BooleanQuery booleanQuery = new BooleanQuery();
            for (String str : forms) {
                PhraseQuery phraseQuery = new PhraseQuery();
                for (String str2 : EUString.split(str)) {
                    phraseQuery.add(new Term("content", str2));
                }
                booleanQuery.add(phraseQuery, BooleanClause.Occur.SHOULD);
            }
            LUQueryFactory.getBooleanQuery("content", BooleanClause.Occur.SHOULD, forms);
            for (ScoreDoc scoreDoc : this.mIndex.search(booleanQuery)) {
                String[] recycle = LURecycler.recycle(this.mIndex.getIndexSearcher().getIndexReader().getTermVector(scoreDoc.doc, "content"));
                if (this.mVerifier.isContext(recycle)) {
                    lURAMWriter.add(recyle(scoreDoc, recycle));
                }
            }
            lURAMWriter.close();
            LUSearcher lUSearcher = new LUSearcher(rAMDirectory);
            int i = 0;
            Iterator<String> it = forms.iterator();
            while (it.hasNext()) {
                i += lUSearcher.getMultiWordTermInfo("content", it.next()).getFrequency();
            }
            return FrequencyClass.calculate(this.mMaxFrequency, i);
        } catch (Exception e) {
            EULogger.warn(e);
            return -1.0d;
        }
    }

    private Set<String> getForms(String str) {
        HashSet hashSet = new HashSet();
        try {
            hashSet.addAll(this.mMeta.getSimiliarTerms(str));
            if (this.mLexicon != null) {
                Iterator it = this.mLexicon.getEntrySet(str).iterator();
                while (it.hasNext()) {
                    Iterator it2 = ((Lexem) it.next()).getForms().iterator();
                    while (it2.hasNext()) {
                        hashSet.add(((String) it2.next()).toLowerCase());
                    }
                }
            }
        } catch (Exception e) {
            EULogger.warn(e);
        }
        return hashSet;
    }

    private double getSingleWordExpressionValue(String str) {
        Set<String> forms = getForms(str);
        try {
            RAMDirectory rAMDirectory = new RAMDirectory();
            LURAMWriter lURAMWriter = new LURAMWriter(rAMDirectory);
            lURAMWriter.create();
            for (ScoreDoc scoreDoc : this.mIndex.search(LUQueryFactory.getBooleanQuery("content", BooleanClause.Occur.SHOULD, forms))) {
                String[] recycle = LURecycler.recycle(this.mIndex.getIndexSearcher().getIndexReader().getTermVector(scoreDoc.doc, "content"));
                if (this.mVerifier.isContext(recycle)) {
                    lURAMWriter.add(recyle(scoreDoc, recycle));
                }
            }
            lURAMWriter.close();
            LUSearcher lUSearcher = new LUSearcher(rAMDirectory);
            int i = 0;
            Iterator<String> it = forms.iterator();
            while (it.hasNext()) {
                i += lUSearcher.getSingleWordTermInfo(it.next(), new String[]{"content"}).getFrequency();
            }
            return FrequencyClass.calculate(this.mMaxFrequency, i);
        } catch (Exception e) {
            EULogger.warn(e);
            return -1.0d;
        }
    }

    public static void main(String[] strArr) throws Exception {
        Watch watch = new Watch();
        watch.start();
        EULogger.info("get context...");
        List lines = EUFileReader.getLines("resource/example/medicine-context.txt");
        EULogger.info("init...");
        XSemFreqClass xSemFreqClass = new XSemFreqClass(lines, new LULabelAnalyser(LULocal.getIndexWikipediaTitlesDE()), LULocal.getSearcherSpiegel(), LULocal.getMetaSearcherSpiegel());
        xSemFreqClass.setLexicon(WiktionaryFactory.getLocalWiktionaryDE().getLexicon(Lexicon.Language.de));
        EULogger.info("get sfc..");
        watch.stop();
        watch.logDurationInS();
        watch.start();
        double singleWordExpressionValue = xSemFreqClass.getSingleWordExpressionValue("atlas");
        EULogger.info("fc('atlas') = " + xSemFreqClass.mMeta.getFrequencyClass("atlas"));
        EULogger.info("sfc('atlas') = " + singleWordExpressionValue);
        watch.stop();
        watch.logDurationInS();
    }

    private Document recyle(ScoreDoc scoreDoc, String[] strArr) throws Exception {
        Document document = new Document();
        String value = this.mIndex.getValue(scoreDoc.doc, "label");
        document.add(LUFieldFactory.getStoredAnalyzedField("content", EUString.append(strArr)));
        document.add(LUFieldFactory.getStoredAnalyzedField("label", value));
        return document;
    }
}
