package de.dfki.sds.lodex.util;

import com.hp.hpl.jena.sparql.sse.Tags;
import de.dfki.inquisitor.collections.MultiValueHashMap;
import de.dfki.inquisitor.collections.ValueBox;
import de.dfki.inquisitor.file.FileUtilz;
import de.dfki.inquisitor.json.JsonObjectParser;
import de.dfki.inquisitor.processes.StopWatch;
import de.dfki.inquisitor.text.StringUtils;
import de.dfki.sds.lodex.GlobalConstants;
import de.dfki.sds.lodex.NamedEntityLinker;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.Collection;
import java.util.LinkedList;
import java.util.zip.GZIPInputStream;
import org.slf4j.LoggerFactory;
import picocli.CommandLine;

/* loaded from: input_file:WEB-INF/lib/lodex-1.4-SNAPSHOT.jar:de/dfki/sds/lodex/util/NamedEntityDataCreator4Json.class */
public class NamedEntityDataCreator4Json {
    public static void createNamedEntityDB(String str, InputStream inputStream, String str2, String str3) throws Exception {
        long currentTimeMillis = System.currentTimeMillis();
        LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("Named entity data path: " + str);
        LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("copy file contents to disk");
        new File(str).mkdirs();
        FileUtilz.string2File(str2, str + "/entityTypes4HighFrqTerms.txt");
        FileUtilz.string2File(str3, str + "/embeddings.conf");
        NamedEntityLinker namedEntityLinker = new NamedEntityLinker(str);
        LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("\nstart loading named entity types (reading and writing)");
        namedEntityLinker.startWriting(true);
        ValueBox valueBox = new ValueBox(0);
        new JsonObjectParser().setShowHandledMetadata(false).parse(inputStream, multiValueTreeMap -> {
            String str4 = (String) multiValueTreeMap.getFirst(NamedEntityLinker.IndexAtts.id, new String[0]);
            multiValueTreeMap.remove(NamedEntityLinker.IndexAtts.id);
            String str5 = (String) multiValueTreeMap.getFirst(Tags.tagLabel, new String[0]);
            multiValueTreeMap.remove(Tags.tagLabel);
            Collection<String> remove = multiValueTreeMap.remove("synonyms");
            Collection<String> remove2 = multiValueTreeMap.remove("types");
            String str6 = (String) multiValueTreeMap.getFirst(CommandLine.Model.UsageMessageSpec.SECTION_KEY_DESCRIPTION, new String[0]);
            multiValueTreeMap.remove(CommandLine.Model.UsageMessageSpec.SECTION_KEY_DESCRIPTION);
            if (StringUtils.nullOrWhitespace(str5)) {
                return;
            }
            namedEntityLinker.addEntity2Index(str4, str5, remove, str6, remove2, new MultiValueHashMap<>(multiValueTreeMap, LinkedList.class), new NamedEntityLinker.Field2Vector[0]);
            valueBox.setValue(Integer.valueOf(((Integer) valueBox.getValue()).intValue() + 1));
            if (((Integer) valueBox.getValue()).intValue() % 10000 == 0) {
                LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("added " + StringUtils.beautifyNumber((Number) valueBox.getValue()) + " entities");
            }
        });
        LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("added " + StringUtils.beautifyNumber((Number) valueBox.getValue()) + " entities");
        namedEntityLinker.stopWriting();
        namedEntityLinker.close();
        StopWatch.stopAndLogDistance(currentTimeMillis, NamedEntityDataCreator4Json.class);
        LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("finished");
    }

    public static void createNamedEntityDB(String str, String str2, String str3, String str4) throws Exception {
        if (str2 == null || str3 == null || str4 == null) {
            System.err.println("Error: You have to specify all input files");
            return;
        }
        LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("Output file path: \n" + str);
        LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("Input file path:\n" + str2 + "\n" + str3 + "\n" + str4 + "\n");
        LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("Current working path: " + new File(".").getAbsolutePath());
        createNamedEntityDB(str, str2.endsWith(".gz") ? new GZIPInputStream(new FileInputStream(str2)) : new FileInputStream(str2), FileUtilz.file2String(str3), FileUtilz.file2String(str4));
    }

    protected static boolean isNiceSynonym(String str, String str2) {
        boolean contains = str.contains(" ");
        boolean contains2 = str2.contains(" ");
        boolean z = ((float) str2.length()) / ((float) str.length()) <= 0.4f;
        if (!contains || contains2 || !z) {
            return true;
        }
        if (str2.length() < 3) {
            return false;
        }
        boolean z2 = false;
        for (int i = 1; i < 5 && str2.length() > i; i++) {
            z2 = z2 || Character.isUpperCase(str2.charAt(i)) || str2.charAt(i) == '.';
        }
        return z2;
    }

    public static void main(String[] strArr) throws Exception {
        if (strArr.length < 1 || strArr.length > 16) {
            System.out.println("NamedEntityDataCreator4Json -o <outputDirPath> -i <inputFilePath> -et <entityTypes4HighFrqTermsFileName> -em <embeddingsConfFileName>\nOptions:\n  -h/--help: this text\n  -o <outputDirPath> the directory where to write the output data. In the case you specify a relative path, the final path will be 'GlobalConstants.strAppBasePath'\n                     plus the given, relative parameter path\n  -i <inputFilePath> the file with the json entities to import\n  -et <entityTypes4HighFrqTermsFileName> the file with the entity types that indicates an entity in the case of an high frequent term.\n  -em <embeddingsConfFileName> the path to the text embeddings configuration\n\n As follows you can see an example entity json file. 'label' is mandatory, other attributes are optional but enhances functionality (id,synonyms,description,types).\n Further, you can add arbitrary, additional attribute value pair metadata to your entities, in order to query or simply store them.\n\n [\n {\n     \"id\":\"bla\",\n     \"label\":\"Kanne\",\n     \"synonyms\":[\"Teekanne\", \"Kaffeekanne\", \"Gießkanne\", \"Ölkanne\"],\n     \"types\":[\"Utensil\", \"Gefäß\"],\n     \"description\":\"Eine Kanne ist ein Gefäß in dem in der Regel Flüssigkeiten ausgegossen werden. In der Regel ist diese nicht zu verschließen, weshalb die Aufbewahrung darin nicht so günstig ist.\",\n     \"material\":\"ein bleliebiges Material\",\n     \"durchsichtig\": \"kommt drauf an\"\n },\n {\n     \"id\":\"bla1\",\n     \"label\":\"Fenster\",\n     \"synonyms\":[\"Guckloch\"],\n     \"types\":[\"Gewerk\", \"Bauteil\"],\n     \"description\":\"Ein Fenster ist in einer Wand zum raus - aber weniger zum reinschauen.\",\n     \"material\":\"Glas\",\n     \"durchsichtig\": \"immer\"\n }\n ]\n");
            return;
        }
        String str = null;
        String str2 = null;
        String str3 = null;
        String str4 = null;
        int i = 0;
        while (i < strArr.length) {
            if ("-h".equals(strArr[i]) || "--help".equals(strArr[i])) {
                System.out.println("NamedEntityDataCreator4Json -o <outputDirPath> -i <inputFilePath> -et <entityTypes4HighFrqTermsFileName> -em <embeddingsConfFileName>\nOptions:\n  -h/--help: this text\n  -o <outputDirPath> the directory where to write the output data. In the case you specify a relative path, the final path will be 'GlobalConstants.strAppBasePath'\n                     plus the given, relative parameter path\n  -i <inputFilePath> the file with the json entities to import\n  -et <entityTypes4HighFrqTermsFileName> the file with the entity types that indicates an entity in the case of an high frequent term.\n  -em <embeddingsConfFileName> the path to the text embeddings configuration\n\n As follows you can see an example entity json file. 'label' is mandatory, other attributes are optional but enhances functionality (id,synonyms,description,types).\n Further, you can add arbitrary, additional attribute value pair metadata to your entities, in order to query or simply store them.\n\n [\n {\n     \"id\":\"bla\",\n     \"label\":\"Kanne\",\n     \"synonyms\":[\"Teekanne\", \"Kaffeekanne\", \"Gießkanne\", \"Ölkanne\"],\n     \"types\":[\"Utensil\", \"Gefäß\"],\n     \"description\":\"Eine Kanne ist ein Gefäß in dem in der Regel Flüssigkeiten ausgegossen werden. In der Regel ist diese nicht zu verschließen, weshalb die Aufbewahrung darin nicht so günstig ist.\",\n     \"material\":\"ein bleliebiges Material\",\n     \"durchsichtig\": \"kommt drauf an\"\n },\n {\n     \"id\":\"bla1\",\n     \"label\":\"Fenster\",\n     \"synonyms\":[\"Guckloch\"],\n     \"types\":[\"Gewerk\", \"Bauteil\"],\n     \"description\":\"Ein Fenster ist in einer Wand zum raus - aber weniger zum reinschauen.\",\n     \"material\":\"Glas\",\n     \"durchsichtig\": \"immer\"\n }\n ]\n");
                return;
            }
            if ("-o".equals(strArr[i])) {
                if (strArr.length < i + 1 + 1) {
                    System.out.println("you must specify a path for parameter '-o'" + "NamedEntityDataCreator4Json -o <outputDirPath> -i <inputFilePath> -et <entityTypes4HighFrqTermsFileName> -em <embeddingsConfFileName>\nOptions:\n  -h/--help: this text\n  -o <outputDirPath> the directory where to write the output data. In the case you specify a relative path, the final path will be 'GlobalConstants.strAppBasePath'\n                     plus the given, relative parameter path\n  -i <inputFilePath> the file with the json entities to import\n  -et <entityTypes4HighFrqTermsFileName> the file with the entity types that indicates an entity in the case of an high frequent term.\n  -em <embeddingsConfFileName> the path to the text embeddings configuration\n\n As follows you can see an example entity json file. 'label' is mandatory, other attributes are optional but enhances functionality (id,synonyms,description,types).\n Further, you can add arbitrary, additional attribute value pair metadata to your entities, in order to query or simply store them.\n\n [\n {\n     \"id\":\"bla\",\n     \"label\":\"Kanne\",\n     \"synonyms\":[\"Teekanne\", \"Kaffeekanne\", \"Gießkanne\", \"Ölkanne\"],\n     \"types\":[\"Utensil\", \"Gefäß\"],\n     \"description\":\"Eine Kanne ist ein Gefäß in dem in der Regel Flüssigkeiten ausgegossen werden. In der Regel ist diese nicht zu verschließen, weshalb die Aufbewahrung darin nicht so günstig ist.\",\n     \"material\":\"ein bleliebiges Material\",\n     \"durchsichtig\": \"kommt drauf an\"\n },\n {\n     \"id\":\"bla1\",\n     \"label\":\"Fenster\",\n     \"synonyms\":[\"Guckloch\"],\n     \"types\":[\"Gewerk\", \"Bauteil\"],\n     \"description\":\"Ein Fenster ist in einer Wand zum raus - aber weniger zum reinschauen.\",\n     \"material\":\"Glas\",\n     \"durchsichtig\": \"immer\"\n }\n ]\n");
                    return;
                } else {
                    str = strArr[i + 1];
                    if (!new File(str).isAbsolute()) {
                        str = GlobalConstants.strAppBasePath + "/" + str;
                    }
                }
            } else if ("-i".equals(strArr[i])) {
                if (strArr.length < i + 1 + 1) {
                    System.out.println("you must specify a file name for parameter '-i'" + "NamedEntityDataCreator4Json -o <outputDirPath> -i <inputFilePath> -et <entityTypes4HighFrqTermsFileName> -em <embeddingsConfFileName>\nOptions:\n  -h/--help: this text\n  -o <outputDirPath> the directory where to write the output data. In the case you specify a relative path, the final path will be 'GlobalConstants.strAppBasePath'\n                     plus the given, relative parameter path\n  -i <inputFilePath> the file with the json entities to import\n  -et <entityTypes4HighFrqTermsFileName> the file with the entity types that indicates an entity in the case of an high frequent term.\n  -em <embeddingsConfFileName> the path to the text embeddings configuration\n\n As follows you can see an example entity json file. 'label' is mandatory, other attributes are optional but enhances functionality (id,synonyms,description,types).\n Further, you can add arbitrary, additional attribute value pair metadata to your entities, in order to query or simply store them.\n\n [\n {\n     \"id\":\"bla\",\n     \"label\":\"Kanne\",\n     \"synonyms\":[\"Teekanne\", \"Kaffeekanne\", \"Gießkanne\", \"Ölkanne\"],\n     \"types\":[\"Utensil\", \"Gefäß\"],\n     \"description\":\"Eine Kanne ist ein Gefäß in dem in der Regel Flüssigkeiten ausgegossen werden. In der Regel ist diese nicht zu verschließen, weshalb die Aufbewahrung darin nicht so günstig ist.\",\n     \"material\":\"ein bleliebiges Material\",\n     \"durchsichtig\": \"kommt drauf an\"\n },\n {\n     \"id\":\"bla1\",\n     \"label\":\"Fenster\",\n     \"synonyms\":[\"Guckloch\"],\n     \"types\":[\"Gewerk\", \"Bauteil\"],\n     \"description\":\"Ein Fenster ist in einer Wand zum raus - aber weniger zum reinschauen.\",\n     \"material\":\"Glas\",\n     \"durchsichtig\": \"immer\"\n }\n ]\n");
                    return;
                } else {
                    str2 = strArr[i + 1];
                    i++;
                }
            } else if ("-et".equals(strArr[i])) {
                if (strArr.length < i + 1 + 1) {
                    System.out.println("you must specify a path for parameter '-et'" + "NamedEntityDataCreator4Json -o <outputDirPath> -i <inputFilePath> -et <entityTypes4HighFrqTermsFileName> -em <embeddingsConfFileName>\nOptions:\n  -h/--help: this text\n  -o <outputDirPath> the directory where to write the output data. In the case you specify a relative path, the final path will be 'GlobalConstants.strAppBasePath'\n                     plus the given, relative parameter path\n  -i <inputFilePath> the file with the json entities to import\n  -et <entityTypes4HighFrqTermsFileName> the file with the entity types that indicates an entity in the case of an high frequent term.\n  -em <embeddingsConfFileName> the path to the text embeddings configuration\n\n As follows you can see an example entity json file. 'label' is mandatory, other attributes are optional but enhances functionality (id,synonyms,description,types).\n Further, you can add arbitrary, additional attribute value pair metadata to your entities, in order to query or simply store them.\n\n [\n {\n     \"id\":\"bla\",\n     \"label\":\"Kanne\",\n     \"synonyms\":[\"Teekanne\", \"Kaffeekanne\", \"Gießkanne\", \"Ölkanne\"],\n     \"types\":[\"Utensil\", \"Gefäß\"],\n     \"description\":\"Eine Kanne ist ein Gefäß in dem in der Regel Flüssigkeiten ausgegossen werden. In der Regel ist diese nicht zu verschließen, weshalb die Aufbewahrung darin nicht so günstig ist.\",\n     \"material\":\"ein bleliebiges Material\",\n     \"durchsichtig\": \"kommt drauf an\"\n },\n {\n     \"id\":\"bla1\",\n     \"label\":\"Fenster\",\n     \"synonyms\":[\"Guckloch\"],\n     \"types\":[\"Gewerk\", \"Bauteil\"],\n     \"description\":\"Ein Fenster ist in einer Wand zum raus - aber weniger zum reinschauen.\",\n     \"material\":\"Glas\",\n     \"durchsichtig\": \"immer\"\n }\n ]\n");
                    return;
                } else {
                    str3 = strArr[i + 1];
                    i++;
                }
            } else if (!"-em".equals(strArr[i])) {
                continue;
            } else if (strArr.length < i + 1 + 1) {
                System.out.println("you must specify a path for parameter '-em'" + "NamedEntityDataCreator4Json -o <outputDirPath> -i <inputFilePath> -et <entityTypes4HighFrqTermsFileName> -em <embeddingsConfFileName>\nOptions:\n  -h/--help: this text\n  -o <outputDirPath> the directory where to write the output data. In the case you specify a relative path, the final path will be 'GlobalConstants.strAppBasePath'\n                     plus the given, relative parameter path\n  -i <inputFilePath> the file with the json entities to import\n  -et <entityTypes4HighFrqTermsFileName> the file with the entity types that indicates an entity in the case of an high frequent term.\n  -em <embeddingsConfFileName> the path to the text embeddings configuration\n\n As follows you can see an example entity json file. 'label' is mandatory, other attributes are optional but enhances functionality (id,synonyms,description,types).\n Further, you can add arbitrary, additional attribute value pair metadata to your entities, in order to query or simply store them.\n\n [\n {\n     \"id\":\"bla\",\n     \"label\":\"Kanne\",\n     \"synonyms\":[\"Teekanne\", \"Kaffeekanne\", \"Gießkanne\", \"Ölkanne\"],\n     \"types\":[\"Utensil\", \"Gefäß\"],\n     \"description\":\"Eine Kanne ist ein Gefäß in dem in der Regel Flüssigkeiten ausgegossen werden. In der Regel ist diese nicht zu verschließen, weshalb die Aufbewahrung darin nicht so günstig ist.\",\n     \"material\":\"ein bleliebiges Material\",\n     \"durchsichtig\": \"kommt drauf an\"\n },\n {\n     \"id\":\"bla1\",\n     \"label\":\"Fenster\",\n     \"synonyms\":[\"Guckloch\"],\n     \"types\":[\"Gewerk\", \"Bauteil\"],\n     \"description\":\"Ein Fenster ist in einer Wand zum raus - aber weniger zum reinschauen.\",\n     \"material\":\"Glas\",\n     \"durchsichtig\": \"immer\"\n }\n ]\n");
                return;
            } else {
                str4 = strArr[i + 1];
                i++;
            }
            i++;
        }
        createNamedEntityDB(str, str2, str3, str4);
    }
}
