package de.dfki.km.exact.annotation;

import de.dfki.km.exact.lucene.LUSearcher;
import de.dfki.km.exact.lucene.meta.LUMetaSearcher;
import de.dfki.km.exact.misc.EULogger;
import de.dfki.km.exact.nlp.EUStopWord;
import de.dfki.km.exact.nlp.NLP;
import de.dfki.km.exact.sesame.EUTripleStoreWriter;
import de.dfki.km.exact.web.lucene.LUTripleIndexSearcher;
import de.dfki.km.exact.web.lucene.LUTripleResult;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import org.apache.lucene.store.Directory;

/* loaded from: input_file:de/dfki/km/exact/annotation/EUAnnotator.class */
public class EUAnnotator implements ANNOTATION {
    private LUMetaSearcher mMetaSearcher;
    private LUSearcher mScenarioIndexSearcher;
    private LUTripleIndexSearcher mTripleIndexSearcher;
    private int mMinFC = 10;
    private NLP.LANGUAGE mLanguage = LANG;
    private int mTermNumber = 1;
    private double mMinSimilarity = 0.75d;
    private int mMultiWordLength = 3;
    private HashMap<String, String> mConceptMap = new HashMap<>();
    private EUAnnotationStore mAnnotationStore = new EUAnnotationStore();

    public EUAnnotator(String str, String str2) throws Exception {
        this.mScenarioIndexSearcher = new LUSearcher(str);
        this.mTripleIndexSearcher = new LUTripleIndexSearcher(str2);
    }

    public EUAnnotator(Directory directory, Directory directory2) throws Exception {
        this.mScenarioIndexSearcher = new LUSearcher(directory);
        this.mTripleIndexSearcher = new LUTripleIndexSearcher(directory2);
    }

    public void setMinSimilarity(double d) {
        this.mMinSimilarity = d;
    }

    public void setMinFC(int i) {
        this.mMinFC = i;
    }

    public LUTripleIndexSearcher getTripleIndexSearcher() {
        return this.mTripleIndexSearcher;
    }

    public EUAnnotationStore getAnnotationStore() {
        return this.mAnnotationStore;
    }

    public void setLanguage(NLP.LANGUAGE language) {
        this.mLanguage = language;
    }

    public void setMultiWordNumber(int i) {
        this.mMultiWordLength = i;
    }

    public void build() throws Exception {
        int i = 0;
        int i2 = 0;
        String str = null;
        HashSet hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        HashSet hashSet3 = new HashSet();
        int maxDoc = this.mScenarioIndexSearcher.getIndexSearcher().getIndexReader().maxDoc();
        for (int i3 = 0; i3 < maxDoc; i3++) {
            if (i3 % 100 == 0) {
                EULogger.info("Doc: " + i3 + " --- Concepts: " + i2 + " --- NonSingles: " + hashSet2.size() + " --- NonMultis: " + hashSet3.size());
                if (hashSet2.size() > 1250000) {
                    hashSet2.clear();
                }
                if (hashSet3.size() > 1250000) {
                    hashSet3.clear();
                }
            }
            String value = this.mScenarioIndexSearcher.getValue(i3, "uri");
            hashSet.clear();
            try {
                for (String str2 : this.mScenarioIndexSearcher.getMultiWordTerms(this.mMultiWordLength, i3, "content")) {
                    str = str2;
                    if (str2 != null && !hashSet3.contains(str2)) {
                        if (this.mMetaSearcher != null && this.mMetaSearcher.getFrequencyClass(str2, false) < this.mMinFC) {
                            hashSet3.add(str2);
                        } else if (this.mConceptMap.containsKey(str2)) {
                            String str3 = this.mConceptMap.get(str2);
                            if (!hashSet.contains(str3)) {
                                hashSet.add(str3);
                                this.mAnnotationStore.addAnnotation(value, str3);
                                this.mAnnotationStore.addLabel(str3, str2);
                                i++;
                            }
                        } else {
                            List<LUTripleResult> fuzzyObjectTriples = this.mTripleIndexSearcher.getFuzzyObjectTriples(false, this.mTermNumber, str2, this.mMinSimilarity);
                            if (fuzzyObjectTriples.size() == 0) {
                                hashSet3.add(str2);
                            } else {
                                HashSet<String> hashSet4 = new HashSet();
                                Iterator<LUTripleResult> it = fuzzyObjectTriples.iterator();
                                while (it.hasNext()) {
                                    hashSet4.add(it.next().getConcept());
                                }
                                for (String str4 : hashSet4) {
                                    if (!hashSet.contains(str4)) {
                                        hashSet.add(str4);
                                        this.mAnnotationStore.addAnnotation(value, str4);
                                        this.mAnnotationStore.addLabel(str4, str2);
                                        this.mConceptMap.put(str2, str4);
                                        i++;
                                        i2++;
                                    }
                                }
                            }
                        }
                    }
                }
                for (String str5 : this.mScenarioIndexSearcher.getSingleWordTerms(i3, "content")) {
                    str = str5;
                    if (!hashSet2.contains(str5)) {
                        if (this.mMetaSearcher != null && this.mMetaSearcher.getFrequencyClass(str5, true) < this.mMinFC) {
                            hashSet2.add(str5);
                        } else if (this.mConceptMap.containsKey(str5)) {
                            String str6 = this.mConceptMap.get(str5);
                            if (!hashSet.contains(str6)) {
                                hashSet.add(str6);
                                this.mAnnotationStore.addAnnotation(value, str6);
                                this.mAnnotationStore.addLabel(str6, str5);
                                i++;
                            }
                        } else if (EUStopWord.isStopWord(str5, this.mLanguage)) {
                            hashSet2.add(str5);
                        } else {
                            List<LUTripleResult> fuzzyObjectTriples2 = this.mTripleIndexSearcher.getFuzzyObjectTriples(false, this.mTermNumber, str5, this.mMinSimilarity);
                            if (fuzzyObjectTriples2.size() == 0) {
                                hashSet2.add(str5);
                            } else {
                                HashSet<String> hashSet5 = new HashSet();
                                for (LUTripleResult lUTripleResult : fuzzyObjectTriples2) {
                                    if (this.mMetaSearcher == null || this.mMetaSearcher.getFrequencyClass(lUTripleResult.getLiteral().toLowerCase(), true) >= this.mMinFC) {
                                        hashSet5.add(lUTripleResult.getConcept());
                                    }
                                }
                                for (String str7 : hashSet5) {
                                    if (!hashSet.contains(str7)) {
                                        hashSet.add(str7);
                                        this.mAnnotationStore.addAnnotation(value, str7);
                                        this.mAnnotationStore.addLabel(str7, str5);
                                        this.mConceptMap.put(str5, str7);
                                        i2++;
                                        i++;
                                    }
                                }
                            }
                        }
                    }
                }
            } catch (Exception e) {
                EULogger.warn(getClass(), "Doc " + i3 + " --- " + e + " --- " + str);
            }
        }
        EULogger.info("Number of annotated concepts " + i);
        EULogger.info("Number of different concepts: " + i2);
    }

    public void writeToFile(String str) throws Exception {
        EUTripleStoreWriter.writeRDFXML(str, this.mAnnotationStore.getStore());
    }

    public void logAnnotations() {
        for (String str : this.mConceptMap.keySet()) {
            EULogger.info(str + " --- " + this.mConceptMap.get(str));
        }
    }

    public void setMetaSearcher(String str) {
        try {
            this.mMetaSearcher = new LUMetaSearcher(str);
        } catch (Exception e) {
            EULogger.warn(getClass(), "Could not initialize meta searcher!s");
        }
    }
}
