package de.dfki.sds.lodex.util;

import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.rdf.model.Literal;
import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP;
import com.hp.hpl.jena.sparql.engine.http.QueryExceptionHTTP;
import com.hp.hpl.jena.sparql.sse.Tags;
import de.dfki.inquisitor.collections.MultiValueHashMap;
import de.dfki.inquisitor.file.FileUtilz;
import de.dfki.inquisitor.processes.StopWatch;
import de.dfki.sds.lodex.GlobalConstants;
import de.dfki.sds.lodex.NamedEntityLinker;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Collections;
import java.util.LinkedList;
import java.util.concurrent.TimeUnit;
import java.util.zip.GZIPInputStream;
import org.apache.xerces.impl.xs.SchemaSymbols;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/lodex-1.4-SNAPSHOT.jar:de/dfki/sds/lodex/util/NamedEntityDataCreator4Dbpedia.class */
public class NamedEntityDataCreator4Dbpedia {
    public static void createNamedEntityDB(String str, InputStream inputStream, String str2, String str3) throws Exception {
        long currentTimeMillis = System.currentTimeMillis();
        LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("Named entity data path: " + str);
        LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("copy file contents to disk");
        new File(str).mkdirs();
        FileUtilz.string2File(str2, str + "/entityTypes4HighFrqTerms.txt");
        FileUtilz.string2File(str3, str + "/embeddings.conf");
        NamedEntityLinker namedEntityLinker = new NamedEntityLinker(str);
        LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("\nstart loading named entity types (reading and writing)");
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
        StringBuilder sb = new StringBuilder();
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        new MultiValueHashMap();
        long currentTimeMillis2 = System.currentTimeMillis();
        namedEntityLinker.startWriting(true);
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                namedEntityLinker.stopWriting();
                namedEntityLinker.close();
                StopWatch.stopAndLogDistance(currentTimeMillis, NamedEntityDataCreator4Dbpedia.class);
                LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("finished");
                return;
            }
            if (!readLine.toLowerCase().contains(".jpg") && !readLine.toLowerCase().contains(":filepath") && !readLine.toLowerCase().contains(".png") && readLine.startsWith(Tags.symLT) && readLine.endsWith(Tags.symGT) && !readLine.substring(1, readLine.length() - 1).contains(Tags.symLT) && !readLine.substring(1, readLine.length() - 1).contains(Tags.symGT) && !readLine.contains("\\") && !readLine.contains("`") && !readLine.contains("^")) {
                sb.append("(").append(readLine).append(") ");
                i++;
                if (i % 1000 != 0) {
                    continue;
                } else {
                    String replace = "PREFIX foaf: <http://xmlns.com/foaf/0.1/> PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX onto: <http://dbpedia.org/ontology/> PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX dul: <http://www.loa-cnr.it/ontologies/DUL.owl#> PREFIX schema: <http://schema.org/> PREFIX yago: <http://yago-knowledge.org/resource/> PREFIX dbp: <http://dbpedia.org/property/> PREFIX dct: <http://purl.org/dc/terms/> SELECT (str(?uri) as ?URI) (GROUP_CONCAT( distinct ?name; SEPARATOR = '-- ') as ?Name) (GROUP_CONCAT( distinct ?rdfsType; SEPARATOR = '-- ') as ?Type) (GROUP_CONCAT( distinct ?realType; SEPARATOR = '-- ') as ?TypeURI) (GROUP_CONCAT( distinct ?des; SEPARATOR = '-- ') as ?Description) (GROUP_CONCAT( distinct ?img; SEPARATOR = '-- ') as ?Img) FROM <http://dbpedia.org> WHERE { VALUES (?uri) { %% }. ?uri rdfs:label ?name. FILTER ( lang(?name) = 'en' || lang(?name) = 'de' ). OPTIONAL { ?uri rdf:type ?realType. OPTIONAL { ?realType rdfs:label ?rdfsType. FILTER ( lang(?rdfsType) = 'en' || lang(?rdfsType) = 'de' ). } } OPTIONAL { ?uri dbo:abstract ?des . FILTER ( lang(?des) = 'en' || lang(?des) = 'de' ). } OPTIONAL { ?uri dbp:image ?img . } } GROUP BY ?uri".replace("%%", sb.toString());
                    sb = new StringBuilder();
                    try {
                        QueryEngineHTTP queryEngineHTTP = new QueryEngineHTTP("http://serv-4101.kl.dfki.de:8890/sparql", replace);
                        try {
                            ResultSet execSelect = queryEngineHTTP.execSelect();
                            while (execSelect.hasNext()) {
                                i3++;
                                QuerySolution nextSolution = execSelect.nextSolution();
                                Literal literal = nextSolution.getLiteral("URI");
                                Literal literal2 = nextSolution.getLiteral("Description");
                                Literal literal3 = nextSolution.getLiteral("TypeURI");
                                Literal literal4 = nextSolution.getLiteral("Type");
                                Literal literal5 = nextSolution.getLiteral(SchemaSymbols.ATTVAL_NAME);
                                Literal literal6 = nextSolution.getLiteral("Img");
                                if (literal == null || literal5 == null) {
                                    break;
                                }
                                MultiValueHashMap<String, String> multiValueHashMap = new MultiValueHashMap<>();
                                String obj = literal.toString();
                                String str4 = "";
                                LinkedList linkedList = new LinkedList();
                                String obj2 = literal5.toString();
                                if (!obj2.equals("")) {
                                    String[] split = obj2.split("-- ");
                                    str4 = split[0];
                                    multiValueHashMap.add("labelTokenized", split[0]);
                                    for (int i4 = 1; i4 < split.length; i4++) {
                                        linkedList.add(split[i4]);
                                        multiValueHashMap.add("labelTokenized", split[i4]);
                                    }
                                }
                                String obj3 = literal2 != null ? literal2.toString() : "";
                                LinkedList linkedList2 = new LinkedList();
                                if (literal3 != null) {
                                    String obj4 = literal3.toString();
                                    if (!obj4.equals("")) {
                                        Collections.addAll(linkedList2, obj4.split("-- "));
                                    }
                                }
                                if (literal4 != null) {
                                    String obj5 = literal4.toString();
                                    if (!obj5.equals("")) {
                                        for (String str5 : obj5.split("-- ")) {
                                            multiValueHashMap.add("typeName", str5);
                                        }
                                    }
                                }
                                if (literal6 != null) {
                                    String obj6 = literal6.toString();
                                    if (!obj6.equals("")) {
                                        for (String str6 : obj6.split("-- ")) {
                                            multiValueHashMap.add("image", str6);
                                        }
                                    }
                                }
                                namedEntityLinker.addEntity2Index(obj, str4, linkedList, obj3, linkedList2, multiValueHashMap, new NamedEntityLinker.Field2Vector[0]);
                            }
                            queryEngineHTTP.close();
                        } catch (Throwable th) {
                            try {
                                queryEngineHTTP.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                            throw th;
                            break;
                        }
                    } catch (QueryExceptionHTTP e) {
                        System.out.println("\nQueryExceptionHTTP occurred" + e.getMessage() + "\nfor query " + replace);
                        i2++;
                    } catch (Exception e2) {
                        System.out.println("\nException occurred " + e2.getMessage() + "\nfor query " + replace);
                        i2++;
                    }
                    Logger logger = LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class);
                    logger.info("added " + i + " entities in " + TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis() - currentTimeMillis2) + " seconds with success " + logger + " and fail " + i3);
                }
            }
        }
    }

    public static void createNamedEntityDB(String str, String str2, String str3, String str4) throws Exception {
        if (str2 == null || str3 == null || str4 == null) {
            System.err.println("Error: You have to specify all input files");
            return;
        }
        LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("Output file path: \n" + str);
        LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("Input file path:\n" + str2 + "\n" + str3 + "\n" + str4 + "\n");
        LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("Current working path: " + new File(".").getAbsolutePath());
        createNamedEntityDB(str, str2.endsWith(".gz") ? new GZIPInputStream(new FileInputStream(str2)) : new FileInputStream(str2), FileUtilz.file2String(str3), FileUtilz.file2String(str4));
    }

    protected static boolean isNiceSynonym(String str, String str2) {
        boolean contains = str.contains(" ");
        boolean contains2 = str2.contains(" ");
        boolean z = ((float) str2.length()) / ((float) str.length()) <= 0.4f;
        if (!contains || contains2 || !z) {
            return true;
        }
        if (str2.length() < 3) {
            return false;
        }
        boolean z2 = false;
        for (int i = 1; i < 5 && str2.length() > i; i++) {
            z2 = z2 || Character.isUpperCase(str2.charAt(i)) || str2.charAt(i) == '.';
        }
        return z2;
    }

    public static void main(String[] strArr) throws Exception {
        if (strArr.length < 1 || strArr.length > 8) {
            System.out.println("NamedEntityDataCreator4Dbpedia -o <outputDirPath> -i <inputFilePath> -et <entityTypes4HighFrqTermsFileName> -em <embeddingsConfFileName>\n Options:\n  -h/--help: this text\n  -o <outputDirPath> the directory where to write the output data. In the case you specify a relative path, the final path will be 'GlobalConstants.strAppBasePath'\n                     plus the given, relative parameter path\n  -i <inputFilePath> the file with the json entities to import\n  -et <entityTypes4HighFrqTermsFileName> the file with the entity types that indicates an entity in the case of an high frequent term. Types are defined with -t\n  -em <embeddingsConfFileName> the path to the text embeddings configuration\n\n Example entity json file:\n\n [\n {\n     \"id\":\"bla\",\n     \"label\":\"Kanne\",\n     \"synonyms\":[\"Teekanne\", \"Kaffeekanne\", \"Gießkanne\", \"Ölkanne\"],\n     \"description\":\"Eine Kanne ist ein Gefäß in dem in der Regel Flüssigkeiten ausgegossen werden. In der Regel ist diese nicht zu verschließen, weshalb die Aufbewahrung darin nicht so günstig ist.\",\n     \"material\":\"ein bleliebiges Material\",\n     \"durchsichtig\": \"kommt drauf an\"\n },\n {\n     \"id\":\"bla1\",\n     \"label\":\"Fenster\",\n     \"synonyms\":[\"Guckloch\"],\n     \"description\":\"Ein Fenster ist in einer Wand zum raus - aber weniger zum reinschauen.\",\n     \"material\":\"Glas\",\n     \"durchsichtig\": \"immer\"\n }\n ]\n");
            return;
        }
        String str = null;
        String str2 = null;
        String str3 = null;
        String str4 = null;
        System.out.println("before loop");
        int i = 0;
        while (i < strArr.length) {
            if ("-h".equals(strArr[i]) || "--help".equals(strArr[i])) {
                System.out.println("NamedEntityDataCreator4Dbpedia -o <outputDirPath> -i <inputFilePath> -et <entityTypes4HighFrqTermsFileName> -em <embeddingsConfFileName>\n Options:\n  -h/--help: this text\n  -o <outputDirPath> the directory where to write the output data. In the case you specify a relative path, the final path will be 'GlobalConstants.strAppBasePath'\n                     plus the given, relative parameter path\n  -i <inputFilePath> the file with the json entities to import\n  -et <entityTypes4HighFrqTermsFileName> the file with the entity types that indicates an entity in the case of an high frequent term. Types are defined with -t\n  -em <embeddingsConfFileName> the path to the text embeddings configuration\n\n Example entity json file:\n\n [\n {\n     \"id\":\"bla\",\n     \"label\":\"Kanne\",\n     \"synonyms\":[\"Teekanne\", \"Kaffeekanne\", \"Gießkanne\", \"Ölkanne\"],\n     \"description\":\"Eine Kanne ist ein Gefäß in dem in der Regel Flüssigkeiten ausgegossen werden. In der Regel ist diese nicht zu verschließen, weshalb die Aufbewahrung darin nicht so günstig ist.\",\n     \"material\":\"ein bleliebiges Material\",\n     \"durchsichtig\": \"kommt drauf an\"\n },\n {\n     \"id\":\"bla1\",\n     \"label\":\"Fenster\",\n     \"synonyms\":[\"Guckloch\"],\n     \"description\":\"Ein Fenster ist in einer Wand zum raus - aber weniger zum reinschauen.\",\n     \"material\":\"Glas\",\n     \"durchsichtig\": \"immer\"\n }\n ]\n");
                return;
            }
            if ("-o".equals(strArr[i])) {
                if (strArr.length < i + 1 + 1) {
                    System.out.println("you must specify a path for parameter '-o'" + "NamedEntityDataCreator4Dbpedia -o <outputDirPath> -i <inputFilePath> -et <entityTypes4HighFrqTermsFileName> -em <embeddingsConfFileName>\n Options:\n  -h/--help: this text\n  -o <outputDirPath> the directory where to write the output data. In the case you specify a relative path, the final path will be 'GlobalConstants.strAppBasePath'\n                     plus the given, relative parameter path\n  -i <inputFilePath> the file with the json entities to import\n  -et <entityTypes4HighFrqTermsFileName> the file with the entity types that indicates an entity in the case of an high frequent term. Types are defined with -t\n  -em <embeddingsConfFileName> the path to the text embeddings configuration\n\n Example entity json file:\n\n [\n {\n     \"id\":\"bla\",\n     \"label\":\"Kanne\",\n     \"synonyms\":[\"Teekanne\", \"Kaffeekanne\", \"Gießkanne\", \"Ölkanne\"],\n     \"description\":\"Eine Kanne ist ein Gefäß in dem in der Regel Flüssigkeiten ausgegossen werden. In der Regel ist diese nicht zu verschließen, weshalb die Aufbewahrung darin nicht so günstig ist.\",\n     \"material\":\"ein bleliebiges Material\",\n     \"durchsichtig\": \"kommt drauf an\"\n },\n {\n     \"id\":\"bla1\",\n     \"label\":\"Fenster\",\n     \"synonyms\":[\"Guckloch\"],\n     \"description\":\"Ein Fenster ist in einer Wand zum raus - aber weniger zum reinschauen.\",\n     \"material\":\"Glas\",\n     \"durchsichtig\": \"immer\"\n }\n ]\n");
                    return;
                } else {
                    str = strArr[i + 1];
                    if (!new File(str).isAbsolute()) {
                        str = GlobalConstants.strAppBasePath + "/" + str;
                    }
                }
            } else if ("-i".equals(strArr[i])) {
                if (strArr.length < i + 1 + 1) {
                    System.out.println("you must specify a file name for parameter '-i'" + "NamedEntityDataCreator4Dbpedia -o <outputDirPath> -i <inputFilePath> -et <entityTypes4HighFrqTermsFileName> -em <embeddingsConfFileName>\n Options:\n  -h/--help: this text\n  -o <outputDirPath> the directory where to write the output data. In the case you specify a relative path, the final path will be 'GlobalConstants.strAppBasePath'\n                     plus the given, relative parameter path\n  -i <inputFilePath> the file with the json entities to import\n  -et <entityTypes4HighFrqTermsFileName> the file with the entity types that indicates an entity in the case of an high frequent term. Types are defined with -t\n  -em <embeddingsConfFileName> the path to the text embeddings configuration\n\n Example entity json file:\n\n [\n {\n     \"id\":\"bla\",\n     \"label\":\"Kanne\",\n     \"synonyms\":[\"Teekanne\", \"Kaffeekanne\", \"Gießkanne\", \"Ölkanne\"],\n     \"description\":\"Eine Kanne ist ein Gefäß in dem in der Regel Flüssigkeiten ausgegossen werden. In der Regel ist diese nicht zu verschließen, weshalb die Aufbewahrung darin nicht so günstig ist.\",\n     \"material\":\"ein bleliebiges Material\",\n     \"durchsichtig\": \"kommt drauf an\"\n },\n {\n     \"id\":\"bla1\",\n     \"label\":\"Fenster\",\n     \"synonyms\":[\"Guckloch\"],\n     \"description\":\"Ein Fenster ist in einer Wand zum raus - aber weniger zum reinschauen.\",\n     \"material\":\"Glas\",\n     \"durchsichtig\": \"immer\"\n }\n ]\n");
                    return;
                } else {
                    str2 = strArr[i + 1];
                    i++;
                }
            } else if ("-et".equals(strArr[i])) {
                if (strArr.length < i + 1 + 1) {
                    System.out.println("you must specify a path for parameter '-et'" + "NamedEntityDataCreator4Dbpedia -o <outputDirPath> -i <inputFilePath> -et <entityTypes4HighFrqTermsFileName> -em <embeddingsConfFileName>\n Options:\n  -h/--help: this text\n  -o <outputDirPath> the directory where to write the output data. In the case you specify a relative path, the final path will be 'GlobalConstants.strAppBasePath'\n                     plus the given, relative parameter path\n  -i <inputFilePath> the file with the json entities to import\n  -et <entityTypes4HighFrqTermsFileName> the file with the entity types that indicates an entity in the case of an high frequent term. Types are defined with -t\n  -em <embeddingsConfFileName> the path to the text embeddings configuration\n\n Example entity json file:\n\n [\n {\n     \"id\":\"bla\",\n     \"label\":\"Kanne\",\n     \"synonyms\":[\"Teekanne\", \"Kaffeekanne\", \"Gießkanne\", \"Ölkanne\"],\n     \"description\":\"Eine Kanne ist ein Gefäß in dem in der Regel Flüssigkeiten ausgegossen werden. In der Regel ist diese nicht zu verschließen, weshalb die Aufbewahrung darin nicht so günstig ist.\",\n     \"material\":\"ein bleliebiges Material\",\n     \"durchsichtig\": \"kommt drauf an\"\n },\n {\n     \"id\":\"bla1\",\n     \"label\":\"Fenster\",\n     \"synonyms\":[\"Guckloch\"],\n     \"description\":\"Ein Fenster ist in einer Wand zum raus - aber weniger zum reinschauen.\",\n     \"material\":\"Glas\",\n     \"durchsichtig\": \"immer\"\n }\n ]\n");
                    return;
                } else {
                    str3 = strArr[i + 1];
                    i++;
                }
            } else if (!"-em".equals(strArr[i])) {
                continue;
            } else if (strArr.length < i + 1 + 1) {
                System.out.println("you must specify a path for parameter '-em'" + "NamedEntityDataCreator4Dbpedia -o <outputDirPath> -i <inputFilePath> -et <entityTypes4HighFrqTermsFileName> -em <embeddingsConfFileName>\n Options:\n  -h/--help: this text\n  -o <outputDirPath> the directory where to write the output data. In the case you specify a relative path, the final path will be 'GlobalConstants.strAppBasePath'\n                     plus the given, relative parameter path\n  -i <inputFilePath> the file with the json entities to import\n  -et <entityTypes4HighFrqTermsFileName> the file with the entity types that indicates an entity in the case of an high frequent term. Types are defined with -t\n  -em <embeddingsConfFileName> the path to the text embeddings configuration\n\n Example entity json file:\n\n [\n {\n     \"id\":\"bla\",\n     \"label\":\"Kanne\",\n     \"synonyms\":[\"Teekanne\", \"Kaffeekanne\", \"Gießkanne\", \"Ölkanne\"],\n     \"description\":\"Eine Kanne ist ein Gefäß in dem in der Regel Flüssigkeiten ausgegossen werden. In der Regel ist diese nicht zu verschließen, weshalb die Aufbewahrung darin nicht so günstig ist.\",\n     \"material\":\"ein bleliebiges Material\",\n     \"durchsichtig\": \"kommt drauf an\"\n },\n {\n     \"id\":\"bla1\",\n     \"label\":\"Fenster\",\n     \"synonyms\":[\"Guckloch\"],\n     \"description\":\"Ein Fenster ist in einer Wand zum raus - aber weniger zum reinschauen.\",\n     \"material\":\"Glas\",\n     \"durchsichtig\": \"immer\"\n }\n ]\n");
                return;
            } else {
                str4 = strArr[i + 1];
                i++;
            }
            i++;
        }
        createNamedEntityDB(str, str2, str3, str4);
    }
}
