package de.dfki.km.exact.web;

import de.dfk.km.exact.nlp.wkt2j.api.Lexem;
import de.dfk.km.exact.nlp.wkt2j.api.Lexicon;
import de.dfk.km.exact.nlp.wkt2j.api.Wiktionary;
import de.dfk.km.exact.nlp.wkt2j.impl.WiktionaryFactory;
import de.dfki.km.exact.file.EUFileReader;
import de.dfki.km.exact.lucene.LUCooccurrence;
import de.dfki.km.exact.lucene.misc.LUCooccurrenceMeasure;
import de.dfki.km.exact.lucene.misc.LUCooccurrenceMethod;
import de.dfki.km.exact.lucene.misc.LUCoocurrenceSearcher;
import de.dfki.km.exact.lucene.misc.LUCoocurrencyFilter;
import de.dfki.km.exact.lucene.misc.LULocal;
import de.dfki.km.exact.misc.EULocal;
import de.dfki.km.exact.misc.EULogger;
import de.dfki.km.exact.nlp.NLP;
import de.dfki.km.exact.nlp.analyser.EUCharacterFilter;
import de.dfki.km.exact.nlp.analyser.EUComplexAnalyser;
import de.dfki.km.exact.nlp.analyser.EULengthFilter;
import de.dfki.km.exact.nlp.analyser.EUStopWordFilter;
import de.dfki.km.exact.nlp.analyser.EUWordFilter;
import de.dfki.km.exact.sesame.EUTripleStore;
import de.dfki.km.exact.sesame.EUTripleStoreFactory;
import de.dfki.km.exact.sesame.EUTripleStoreWriter;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.SortedSet;
import org.openrdf.model.Literal;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.Value;
import org.openrdf.model.impl.LiteralImpl;
import org.openrdf.model.impl.URIImpl;
import org.openrdf.repository.RepositoryResult;

/* loaded from: input_file:de/dfki/km/exact/web/XUOntologyExtension.class */
public class XUOntologyExtension {
    private Set<String> mOntologyTerms;
    private Wiktionary mWiktionary = null;
    private EUComplexAnalyser mFilter = getFilter();
    private LUCoocurrenceSearcher mCoocSearcher = new LUCoocurrenceSearcher(LULocal.getSearcherSpiegel(), LULocal.getMetaSearcherSpiegel());
    private EUTripleStore mTripleStore = EUTripleStoreFactory.getMemoryStore();

    public void setWiktionary(String str) {
        this.mWiktionary = WiktionaryFactory.getWiktionary(str);
    }

    public void addOntologyByFile(String str) throws Exception {
        this.mTripleStore.addFile(str);
    }

    public void appendWordForms(String str, String str2, String str3) throws Exception {
        EULogger.info("append word forms...");
        URIImpl uRIImpl = new URIImpl(str);
        URIImpl uRIImpl2 = new URIImpl(str2);
        RepositoryResult statements = this.mTripleStore.getStatements((Resource) null, uRIImpl, (Value) null);
        while (statements.hasNext()) {
            Statement statement = (Statement) statements.next();
            if (statement.getObject() instanceof Literal) {
                Literal object = statement.getObject();
                if (object.getLanguage().equals(str3)) {
                    String filter = this.mFilter.filter(object.getLabel());
                    if (filter != null) {
                        Iterator<String> it = getSimiliarTerms(filter, str3).iterator();
                        while (it.hasNext()) {
                            this.mTripleStore.addStatement(statement.getSubject(), uRIImpl2, new LiteralImpl(it.next(), str3));
                        }
                    }
                }
            }
        }
    }

    public void appendWordContext(String str, String str2, String str3) throws Exception {
        EULogger.info("append word context...");
        URIImpl uRIImpl = new URIImpl(str);
        URIImpl uRIImpl2 = new URIImpl(str2);
        RepositoryResult statements = this.mTripleStore.getStatements((Resource) null, uRIImpl, (Value) null);
        while (statements.hasNext()) {
            Statement statement = (Statement) statements.next();
            if (statement.getObject() instanceof Literal) {
                Literal object = statement.getObject();
                if (object.getLanguage().equals(str3)) {
                    String filter = this.mFilter.filter(object.getLabel().toLowerCase());
                    if (filter != null) {
                        EULogger.info("Get windows for " + filter + "...");
                        SortedSet context = this.mCoocSearcher.getContext(filter);
                        if (context.size() > 0) {
                            this.mTripleStore.addStatement(statement.getSubject(), uRIImpl2, new LiteralImpl(writeAsString(context), str3));
                        }
                    }
                }
            }
        }
    }

    public void setTerms() {
        XUOntologyTermExtractor xUOntologyTermExtractor = new XUOntologyTermExtractor(this.mTripleStore);
        xUOntologyTermExtractor.add("http://www.w3.org/2000/01/rdf-schema#label", "de");
        xUOntologyTermExtractor.add("http://www.dfki.de/km/ontology/forcher/fweb#hasWordForm", "de");
        this.mOntologyTerms = xUOntologyTermExtractor.getTerms(" …•‚”“„‘«»<>’,|`.;:?!-_'/()[]{}@§$%&=^°*+~#´\"\\→„“");
    }

    private final String writeAsString(Collection<LUCooccurrence> collection) {
        StringBuilder sb = new StringBuilder();
        for (LUCooccurrence lUCooccurrence : collection) {
            if (!lUCooccurrence.getBaseTerm().equals(lUCooccurrence.getTerm())) {
                if (this.mOntologyTerms == null) {
                    sb.append(lUCooccurrence.getTerm());
                    sb.append(" ");
                } else if (!this.mOntologyTerms.contains(lUCooccurrence.getTerm().toLowerCase())) {
                    sb.append(lUCooccurrence.getTerm());
                    sb.append(" ");
                }
            }
        }
        return sb.toString().trim();
    }

    private Set<String> getSimiliarTerms(String str, String str2) {
        Set entrySet;
        HashSet hashSet = new HashSet();
        if (this.mWiktionary != null && (entrySet = this.mWiktionary.getEntrySet(Lexicon.Language.de, str)) != null) {
            Iterator it = entrySet.iterator();
            while (it.hasNext()) {
                Iterator it2 = ((Lexem) it.next()).getForms().iterator();
                while (it2.hasNext()) {
                    String filter = this.mFilter.filter((String) it2.next());
                    if (filter != null && !filter.equals("")) {
                        hashSet.add(filter);
                    }
                }
            }
            hashSet.remove(str);
        }
        return hashSet;
    }

    public void writeToFile(String str) throws Exception {
        EUTripleStoreWriter.writeRDFXML(str, this.mTripleStore);
    }

    public static EUComplexAnalyser getFilter() {
        EUComplexAnalyser eUComplexAnalyser = new EUComplexAnalyser(" …•‚”“„‘«»<>’,|0123456789`.;:?!-_'/()[]{}@§$%&=^°*+~#´\"\\→„“");
        eUComplexAnalyser.add(new EUCharacterFilter(" …•‚”“„‘«»<>’,|0123456789`.;:?!-_'/()[]{}@§$%&=^°*+~#´\"\\→„“"));
        eUComplexAnalyser.add(new EUStopWordFilter(NLP.LANGUAGE.de));
        eUComplexAnalyser.add(new EULengthFilter(3, -1));
        return eUComplexAnalyser;
    }

    public static void main(String[] strArr) throws Exception {
        XUOntologyExtension xUOntologyExtension = new XUOntologyExtension();
        LUCooccurrenceMethod method = xUOntologyExtension.mCoocSearcher.getMethod();
        LUCoocurrencyFilter filter = xUOntologyExtension.mCoocSearcher.getFilter();
        filter.getBaseAnalyser().add(new EUWordFilter(EUFileReader.getLines("resource/example/cooc-filter.txt")));
        method.setMeasureType(LUCooccurrenceMeasure.TYPE.POI);
        method.setMargin(12);
        method.setWindowNumber(31812430);
        filter.setMaxCooccurrenceNumber(200);
        filter.setMinFreqClass(4);
        filter.setMaxFreqClass(20);
        filter.setMinFrequency(2);
        xUOntologyExtension.setWiktionary(EULocal.getValue("database-de-wiktionary"));
        xUOntologyExtension.addOntologyByFile("resource/example/psyndex-ontology.rdf");
        xUOntologyExtension.appendWordForms("http://www.w3.org/2000/01/rdf-schema#label", "http://www.dfki.de/km/ontology/forcher/fweb#hasWordForm", "de");
        xUOntologyExtension.appendWordContext("http://www.w3.org/2000/01/rdf-schema#label", "http://www.dfki.de/km/ontology/forcher/fweb#hasWordContext", "de");
        xUOntologyExtension.writeToFile("resource/example/extended-psyndex-ontology.rdf");
    }
}
