package de.dfki.km.exact.koios.example.med;

import de.dfki.km.exact.file.CSVWriter;
import de.dfki.km.exact.file.EUFileReader;
import de.dfki.km.exact.file.EUFileWriter;
import de.dfki.km.exact.lucene.LUWeightedTerm;
import de.dfki.km.exact.lucene.wiki.LUWikiSearcher;
import de.dfki.km.exact.misc.EULocal;
import de.dfki.km.exact.misc.EULogger;
import de.dfki.km.exact.misc.EUString;
import de.dfki.km.exact.misc.LOCAL;
import de.dfki.km.exact.nlp.EUStopWord;
import de.dfki.km.exact.nlp.NLP;
import de.dfki.km.exact.sesame.EUTripleStore;
import de.dfki.km.exact.sesame.EUTripleStoreFactory;
import de.dfki.km.exact.web.lucene.LUTripleIndexSearcher;
import de.dfki.km.exact.web.lucene.LUTripleIndexWriter;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Set;
import org.apache.http.HttpHost;
import org.apache.lucene.store.RAMDirectory;
import org.openrdf.model.vocabulary.RDFS;

/* loaded from: input_file:WEB-INF/lib/xkoios-17-20140430.130113-24.jar:de/dfki/km/exact/koios/example/med/MedicineValuatorBuilder.class */
public class MedicineValuatorBuilder {
    public static void main(String[] strArr) throws Exception {
        writeContext();
        cleanContext();
    }

    public static void cleanContext() throws Exception {
        LinkedList linkedList = new LinkedList();
        Iterator<String> it = EUFileReader.getLines(MEDICINE.MED_CONTEXT).iterator();
        while (it.hasNext()) {
            linkedList.add(EUString.split(it.next(), ";")[0]);
        }
        EUFileWriter.writeAsLines("resource/example/medicine/files/medicine-context.txt", linkedList);
    }

    public static void writeContext() throws Exception {
        String value = EULocal.getValue(LOCAL.META_WIKIPEDIA_DE);
        String value2 = EULocal.getValue(LOCAL.INDEX_WIKIPEDIA_DE);
        EUTripleStore memoryStore = EUTripleStoreFactory.getMemoryStore();
        memoryStore.addFile(MEDICINE.XRADLEX_DE);
        memoryStore.addFile(MEDICINE.ICD_10_DE);
        EULogger.info("loaded medicine ontologies...");
        RAMDirectory rAMDirectory = new RAMDirectory();
        LUTripleIndexWriter lUTripleIndexWriter = new LUTripleIndexWriter(rAMDirectory, new String[]{MEDICINE.NON_ENGLISH, RDFS.LABEL.toString()}, memoryStore);
        lUTripleIndexWriter.create();
        lUTripleIndexWriter.write();
        lUTripleIndexWriter.close();
        EULogger.info("loaded wiki indices...");
        LUWikiSearcher lUWikiSearcher = new LUWikiSearcher(value2, value, NLP.LANGUAGE.de);
        EULogger.info("wrote triple store index...");
        LUTripleIndexSearcher lUTripleIndexSearcher = new LUTripleIndexSearcher(rAMDirectory);
        HashSet hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        Set<String> singleWordTerms = lUTripleIndexSearcher.getSingleWordTerms();
        EULogger.info("index terms size: " + singleWordTerms.size());
        EULogger.info("get forms...");
        int i = 0;
        for (String str : singleWordTerms) {
            if (!str.startsWith(HttpHost.DEFAULT_SCHEME_NAME)) {
                for (String str2 : EUString.split(EUString.clean(str.toLowerCase(), " …•‚”“„‘«»<>’,|`.;:?!-_'/()[]{}@§$%&=^°*+~#´\"\\"))) {
                    if (str2.length() < 5) {
                        hashSet2.add(str2);
                    } else if (!hashSet.contains(str2) && !hashSet2.contains(str2)) {
                        if (EUStopWord.isStopWord(str2, NLP.LANGUAGE.de)) {
                            hashSet2.add(str2);
                        } else {
                            hashSet.add(str2);
                            Iterator<String> it = lUWikiSearcher.getTermSearcher().getMetaSearcher().getSimiliarTerms(str2).iterator();
                            while (it.hasNext()) {
                                hashSet.add(it.next());
                            }
                        }
                    }
                }
                i++;
                if (i % 100 == 0) {
                    EULogger.info(i + " terms were analysed...");
                }
            }
        }
        EULogger.info("weight terms...");
        EULogger.info("context terms size: " + hashSet.size());
        Set<LUWeightedTerm> relevantTerms = lUWikiSearcher.getTermSearcher().getRelevantTerms(EUString.append(hashSet));
        EULogger.info("weighted context terms size: " + relevantTerms.size());
        CSVWriter cSVWriter = new CSVWriter(MEDICINE.MED_CONTEXT);
        for (LUWeightedTerm lUWeightedTerm : relevantTerms) {
            cSVWriter.writeCell(lUWeightedTerm.getTerm());
            cSVWriter.writeDoubleCell(lUWeightedTerm.getScore().doubleValue());
            cSVWriter.nextLine();
            cSVWriter.flush();
        }
        cSVWriter.close();
    }
}
