package dfki.km.tweekreco.ner.util;

import de.dfki.inquisition.collections.MultiValueHashMap;
import de.dfki.inquisition.collections.MultiValueTreeMap;
import de.dfki.inquisition.collections.ValueBox;
import de.dfki.inquisition.file.FileUtils;
import de.dfki.inquisition.json.JsonObjectParser;
import de.dfki.inquisition.processes.StopWatch;
import de.dfki.inquisition.text.StringUtils;
import dfki.km.tweekreco.ner.GlobalConstants;
import dfki.km.tweekreco.ner.NamedEntityRecognizer;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.Collection;
import java.util.LinkedList;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.GZIPInputStream;

/* loaded from: input_file:dfki/km/tweekreco/ner/util/NamedEntityDataCreator4Json.class */
public class NamedEntityDataCreator4Json {
    public static void createNamedEntityDB(String str, InputStream inputStream, String str2, String str3, byte[] bArr, String str4, String str5) throws Exception {
        long currentTimeMillis = System.currentTimeMillis();
        Logger.getLogger(NamedEntityDataCreator.class.getName()).info("Named entity data path: " + str);
        Logger.getLogger(NamedEntityDataCreator.class.getName()).info("copy file contents to disk");
        new File(str).mkdirs();
        FileUtils.string2File(str2, str + "/stopwords4Indexing.txt");
        FileUtils.string2File(str3, str + "/highFrqTerms4Disambiguation.txt");
        FileUtils.bytes2File(bArr, str + "/openNLPPosModel.bin");
        FileUtils.string2File(str4, str + "/posTags4Entities.txt");
        FileUtils.string2File(str5, str + "/entityTypes4HighFrqTerms.txt");
        final NamedEntityRecognizer namedEntityRecognizer = new NamedEntityRecognizer(str);
        Logger.getLogger(NamedEntityDataCreator.class.getName()).info("\nstart loading named entity types (reading and writing)");
        namedEntityRecognizer.startWriting(true);
        final ValueBox valueBox = new ValueBox(0);
        new JsonObjectParser().setShowHandledMetadata(false).parse(inputStream, new JsonObjectParser.JsonObjectHandler() { // from class: dfki.km.tweekreco.ner.util.NamedEntityDataCreator4Json.1
            public void handleMetadata(MultiValueTreeMap<String, String> multiValueTreeMap) throws Exception {
                String str6 = (String) multiValueTreeMap.getFirst(NamedEntityRecognizer.IndexAtts.id);
                multiValueTreeMap.remove(NamedEntityRecognizer.IndexAtts.id);
                String str7 = (String) multiValueTreeMap.getFirst("label");
                multiValueTreeMap.remove("label");
                Collection<String> remove = multiValueTreeMap.remove("synonyms");
                Collection<String> remove2 = multiValueTreeMap.remove("types");
                String str8 = (String) multiValueTreeMap.getFirst("description");
                multiValueTreeMap.remove("description");
                if (StringUtils.nullOrWhitespace(str7)) {
                    return;
                }
                NamedEntityRecognizer.this.addEntity2Index(str6, str7, remove, str8, remove2, new MultiValueHashMap<>(multiValueTreeMap, LinkedList.class));
                valueBox.setValue(Integer.valueOf(((Integer) valueBox.getValue()).intValue() + 1));
                if (((Integer) valueBox.getValue()).intValue() % 10000 == 0) {
                    Logger.getLogger(NamedEntityDataCreator.class.getName()).info("added " + StringUtils.beautifyNumber((Number) valueBox.getValue()) + " entities");
                }
            }
        });
        Logger.getLogger(NamedEntityDataCreator.class.getName()).info("added " + StringUtils.beautifyNumber((Number) valueBox.getValue()) + " entities");
        namedEntityRecognizer.stopWriting();
        namedEntityRecognizer.close();
        StopWatch.stopAndLogDistance(currentTimeMillis, Level.INFO);
        Logger.getLogger(NamedEntityDataCreator.class.getName()).info("finished");
    }

    protected static boolean isNiceSynonym(String str, String str2) {
        boolean contains = str.contains(" ");
        boolean contains2 = str2.contains(" ");
        boolean z = ((float) str2.length()) / ((float) str.length()) <= 0.4f;
        if (!contains || contains2 || !z) {
            return true;
        }
        if (str2.length() < 3) {
            return false;
        }
        Boolean bool = false;
        for (int i = 1; i < 5 && str2.length() > i; i++) {
            bool = Boolean.valueOf(bool.booleanValue() || Character.isUpperCase(str2.charAt(i)) || str2.charAt(i) == '.');
        }
        return bool.booleanValue();
    }

    public static void createNamedEntityDB(String str, String str2, String str3, String str4, String str5, String str6, String str7) throws Exception {
        if (str2 == null || str3 == null || str4 == null || str5 == null || str6 == null || str7 == null) {
            System.err.println("Error: You have to specify all input files");
            return;
        }
        Logger.getLogger(NamedEntityDataCreator.class.getName()).info("Output file path: \n" + str);
        Logger.getLogger(NamedEntityDataCreator.class.getName()).info("Input file path:\n" + str2 + "\n" + str3 + "\n" + str4 + "\n" + str5 + "\n" + str6 + "\n" + str7 + "\n");
        Logger.getLogger(NamedEntityDataCreator.class.getName()).info("Current working path: " + new File(".").getAbsolutePath());
        createNamedEntityDB(str, str2.endsWith(".gz") ? new GZIPInputStream(new FileInputStream(str2)) : new FileInputStream(str2), FileUtils.file2String(str3), FileUtils.file2String(str4), FileUtils.getBytesFromFile(new File(str5)), FileUtils.file2String(str6), FileUtils.file2String(str7));
    }

    public static void main(String[] strArr) throws Exception {
        String str = "NamedEntityDataCreator4Json -o <outputDirPath> -i <inputFilePath>-si <stopwords4IndexingFileName> -htd <highFrqTerms4DisambiguationFileName> -pm <openNLPPosModelFileName> -pt <posTags4EntitiesFileName> -et <entityTypes4HighFrqTermsFileName>\nOptions:\n  -h/--help: this text\n  -o <outputDirPath> the directory where to write the output data. In the case you specify a relative path, the final path will be '" + GlobalConstants.strAppBasePath + "' plus the given, relative parameter path\n  -i <inputFilePath> the file with the json entities to import\n  -si <stopwords4IndexingFileName> the file with the stopwords that should be used for indexing\n  -htd <highFrqTerms4DisambiguationFileName> the file with the terms (normally language-specific high frequent terms) that should be used for disambiguation\n  -pm <openNLPPosModelFileName> the file with the trained openNLP pos model. Download at http://opennlp.sourceforge.net/models-1.5/\n  -pt <posTags4EntitiesFileName> the file with the pos tags that indicates an entity (e.g. NE, NN for german language)\n  -et <entityTypes4HighFrqTermsFileName> the file with the entity types that indicates an entity in the case of an high frequent term.\n\nExample entity json file:\n\n[\n{\n    \"id\":\"bla\",\n    \"label\":\"Kanne\",\n    \"synonyms\":[\"Teekanne\", \"Kaffeekanne\", \"Gießkanne\", \"Ölkanne\"],\n    \"description\":\"Eine Kanne ist ein Gefäß in dem in der Regel Flüssigkeiten ausgegossen werden. In der Regel ist diese nicht zu verschließen, weshalb die Aufbewahrung darin nicht so günstig ist.\",\n    \"material\":\"ein bleliebiges Material\",\n    \"durchsichtig\": \"kommt drauf an\"\n},\n{\n    \"id\":\"bla1\",\n    \"label\":\"Fenster\",\n    \"synonyms\":[\"Guckloch\"],\n    \"description\":\"Ein Fenster ist in einer Wand zum raus - aber weniger zum reinschauen.\",\n    \"material\":\"Glas\",\n    \"durchsichtig\": \"immer\"\n}\n]\n\n";
        if (strArr.length < 1 || strArr.length > 14) {
            System.out.println(str);
            return;
        }
        String str2 = null;
        String str3 = null;
        String str4 = null;
        String str5 = null;
        String str6 = null;
        String str7 = null;
        String str8 = null;
        int i = 0;
        while (i < strArr.length) {
            if ("-h".equals(strArr[i]) || "--help".equals(strArr[i])) {
                System.out.println(str);
                return;
            }
            if ("-o".equals(strArr[i])) {
                if (strArr.length < i + 1 + 1) {
                    System.out.println("you must specify a path for parameter '-o'" + str);
                    return;
                } else {
                    str2 = strArr[i + 1];
                    if (!new File(str2).isAbsolute()) {
                        str2 = GlobalConstants.strAppBasePath + "/" + str2;
                    }
                }
            } else if ("-i".equals(strArr[i])) {
                if (strArr.length < i + 1 + 1) {
                    System.out.println("you must specify a file name for parameter '-i'" + str);
                    return;
                } else {
                    str3 = strArr[i + 1];
                    i++;
                }
            } else if ("-si".equals(strArr[i])) {
                if (strArr.length < i + 1 + 1) {
                    System.out.println("you must specify a path for parameter '-si'" + str);
                    return;
                } else {
                    str4 = strArr[i + 1];
                    i++;
                }
            } else if ("-htd".equals(strArr[i])) {
                if (strArr.length < i + 1 + 1) {
                    System.out.println("you must specify a path for parameter '-htd'" + str);
                    return;
                } else {
                    str5 = strArr[i + 1];
                    i++;
                }
            } else if ("-pm".equals(strArr[i])) {
                if (strArr.length < i + 1 + 1) {
                    System.out.println("you must specify a path for parameter '-pm'" + str);
                    return;
                } else {
                    str6 = strArr[i + 1];
                    i++;
                }
            } else if ("-pt".equals(strArr[i])) {
                if (strArr.length < i + 1 + 1) {
                    System.out.println("you must specify a path for parameter '-pt'" + str);
                    return;
                } else {
                    str7 = strArr[i + 1];
                    i++;
                }
            } else if (!"-et".equals(strArr[i])) {
                continue;
            } else if (strArr.length < i + 1 + 1) {
                System.out.println("you must specify a path for parameter '-et'" + str);
                return;
            } else {
                str8 = strArr[i + 1];
                i++;
            }
            i++;
        }
        createNamedEntityDB(str2, str3, str4, str5, str6, str7, str8);
    }
}
