package de.dfki.km.exact.lucene.example;

import de.dfki.km.exact.file.EUFileReader;
import de.dfki.km.exact.lucene.LUQueryFactory;
import de.dfki.km.exact.lucene.meta.LUTermSearcher;
import de.dfki.km.exact.lucene.wiki.LUWikiSearcher;
import de.dfki.km.exact.misc.EULocal;
import de.dfki.km.exact.misc.EUString;
import de.dfki.km.exact.misc.LOCAL;
import de.dfki.km.exact.nlp.EUCharacter;
import de.dfki.km.exact.nlp.EULetter;
import de.dfki.km.exact.nlp.EUNumber;
import de.dfki.km.exact.nlp.NLP;
import java.util.List;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.ScoreDoc;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;

/* loaded from: input_file:WEB-INF/lib/lucene-util-17-20140430.114905-2.jar:de/dfki/km/exact/lucene/example/XampleNP01.class */
public class XampleNP01 {
    static String pdfFile = "resource/example/np/BA_ERHARD_Druckreduzierventil_DVP_DN50_350_DE.pdf";
    static String xmlFile = "resource/example/np/secret.xml";
    static String textFile = "resource/example/np/Description.txt";
    static String meta = EULocal.getValue(LOCAL.META_WIKIPEDIA_DE);
    static String index = EULocal.getValue(LOCAL.INDEX_WIKIPEDIA_DE);
    static LUTermSearcher termSearcher;
    static LUWikiSearcher wikiSearcher;

    public static void main(String[] strArr) throws Exception {
        termSearcher = new LUTermSearcher(index, meta, NLP.LANGUAGE.de);
        wikiSearcher = new LUWikiSearcher(index, meta, NLP.LANGUAGE.de);
        new PDFTextStripper();
        PDDocument.load(pdfFile);
        SinnXMLWriter.write2XMLFile(xmlFile, parse(EUFileReader.getLines(textFile)));
    }

    public static final SinnProcess parse(List<String> list) throws Exception {
        SinnProcess sinnProcess;
        SinnProcess sinnProcess2 = null;
        SinnProcess sinnProcess3 = null;
        SinnProcess sinnProcess4 = new SinnProcess();
        for (String str : list) {
            char charAt = str.charAt(0);
            if (EUNumber.isDigit(Character.valueOf(charAt))) {
                sinnProcess2 = new SinnProcess(str);
                sinnProcess = sinnProcess2;
                sinnProcess4.addProcess(sinnProcess2);
            } else if (EULetter.isEnglishLetter(Character.valueOf(charAt))) {
                sinnProcess3 = new SinnProcess(str);
                sinnProcess = sinnProcess3;
                sinnProcess2.addProcess(sinnProcess3);
            } else if (EUCharacter.isSpecialCharacter(Character.valueOf(charAt))) {
                SinnProcess sinnProcess5 = new SinnProcess(str);
                sinnProcess = sinnProcess5;
                sinnProcess3.addProcess(sinnProcess5);
            } else {
                sinnProcess = null;
            }
            for (String str2 : EUString.split(str, NLP.CommonDelimeter)) {
                if (str2.length() > 7) {
                    ScoreDoc[] search = termSearcher.getIndexSearcher().search(LUQueryFactory.getBooleanQuery("label", BooleanClause.Occur.SHOULD, termSearcher.getMetaSearcher().getSimiliarTerms(str2)));
                    if (search.length == 1) {
                        sinnProcess.addEntity(new SinnEntity(str2, wikiSearcher.getAricleURIByDocID(search[0].doc)));
                    }
                }
            }
        }
        return sinnProcess4;
    }
}
