package de.dfki.sds.lodex.util;

import com.google.common.collect.Iterators;
import com.google.common.collect.UnmodifiableIterator;
import de.dfki.inquisitor.collections.CollectionUtilz;
import de.dfki.inquisitor.collections.MultiValueConfiguration;
import de.dfki.inquisitor.collections.MultiValueHashMap;
import de.dfki.inquisitor.collections.ValueBox;
import de.dfki.inquisitor.file.FileUtilz;
import de.dfki.inquisitor.math.MathUtilz;
import de.dfki.inquisitor.processes.StopWatch;
import de.dfki.inquisitor.text.StringUtils;
import de.dfki.sds.lodex.GlobalConstants;
import de.dfki.sds.lodex.NamedEntityLinker;
import de.dfki.sds.lodex.embeddings.Embeddings;
import de.dfki.sds.lodex.evaluation.CrossNerUtil;
import de.dfki.sds.lodex.util.VectorAvgHistograms;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Objects;
import java.util.UUID;
import java.util.stream.Stream;
import org.apache.commons.math3.linear.ArrayRealVector;
import org.apache.commons.math3.linear.RealVector;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/lodex-1.4-SNAPSHOT.jar:de/dfki/sds/lodex/util/NamedEntityDataTrainer4CrossNer.class */
public class NamedEntityDataTrainer4CrossNer {
    private static void createHistograms(HashMap<String, List<HashMap<String, List<Float>>>> hashMap, Embeddings embeddings, HashMap<String, StringBuilder> hashMap2, HashMap<String, StringBuilder> hashMap3, HashMap<String, StringBuilder> hashMap4) {
        HashMap hashMap5 = new HashMap();
        hashMap.forEach((str, list) -> {
            Iterator it = list.iterator();
            while (it.hasNext()) {
                ((HashMap) it.next()).forEach((str, list) -> {
                    ((List) hashMap5.computeIfAbsent(str + "_" + (str.hashCode() & 65535), str -> {
                        return new LinkedList();
                    })).add(list.stream().map(f -> {
                        return Double.valueOf(f.floatValue());
                    }).toList());
                });
            }
        });
        LinkedList linkedList = new LinkedList();
        hashMap5.forEach((str2, list2) -> {
            linkedList.add(new VectorAvgHistograms.AggregationName2Vector(str2 + "_Avg", MathUtilz.averageVector(list2)));
        });
        hashMap2.forEach((str3, sb) -> {
            embeddings.createEmbeddings(sb.toString()).forEach((str3, list3) -> {
                linkedList.add(new VectorAvgHistograms.AggregationName2Vector(str3 + "_textTriggerConcat_" + (str3.hashCode() & 65535), list3.stream().mapToDouble((v0) -> {
                    return v0.doubleValue();
                }).boxed().toList()));
            });
        });
        hashMap3.forEach((str4, sb2) -> {
            embeddings.createEmbeddings(sb2.toString()).forEach((str4, list3) -> {
                linkedList.add(new VectorAvgHistograms.AggregationName2Vector(str4 + "_windowConcat_" + (str4.hashCode() & 65535), list3.stream().mapToDouble((v0) -> {
                    return v0.doubleValue();
                }).boxed().toList()));
            });
        });
        hashMap4.forEach((str5, sb3) -> {
            embeddings.createEmbeddings(sb3.toString()).forEach((str5, list3) -> {
                linkedList.add(new VectorAvgHistograms.AggregationName2Vector(str5 + "_allConcat_" + (str5.hashCode() & 65535), list3.stream().mapToDouble((v0) -> {
                    return v0.doubleValue();
                }).boxed().toList()));
            });
        });
        VectorAvgHistograms.createHistograms("/home/reuschling/downloads/histogramsCheck.png", 4, hashMap5, linkedList);
    }

    public static void createNamedEntityDB(String str, String str2, String str3, String str4) throws Exception {
        if (str2 == null || str3 == null || str4 == null) {
            System.err.println("Error: You have to specify all input files");
            return;
        }
        LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("Output file path: \n" + str);
        LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("Input file paths:\n\t" + str2 + "\n\t" + str3 + "\n\t" + str4 + "\n");
        LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("Current working path: " + new File(".").getAbsolutePath());
        long currentTimeMillis = System.currentTimeMillis();
        LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("Named entity data path: " + str);
        boolean z = false;
        if (Files.exists(Path.of(str, new String[0]), new LinkOption[0])) {
            LoggerFactory.getLogger(NamedEntityDataTrainer4CrossNer.class.getName()).error("'{}' exists, will not overwrite. Remove it in front of the training. The histograms will still be generated.", str);
            z = true;
        } else {
            LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("Copy file contents to disk");
            new File(str).mkdirs();
            FileUtilz.string2File(FileUtilz.file2String(str3), str + "/entityTypes4HighFrqTerms.txt");
            FileUtilz.string2File(FileUtilz.file2String(str4), str + "/embeddings.conf");
        }
        Embeddings embeddings = new Embeddings(new MultiValueConfiguration().fromFile(str4));
        LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("\nStart training by generating average embedding vectors");
        ValueBox valueBox = new ValueBox(0);
        CrossNerUtil.CrossNerContent processCrossNerText = CrossNerUtil.processCrossNerText(FileUtilz.file2String(str2));
        MultiValueHashMap multiValueHashMap = new MultiValueHashMap();
        processCrossNerText.crossNerEntities().forEach(offsetMatchId -> {
            multiValueHashMap.add(offsetMatchId.entityLabel(), offsetMatchId);
        });
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        HashMap hashMap3 = new HashMap();
        HashMap hashMap4 = new HashMap();
        HashMap hashMap5 = new HashMap();
        HashMap hashMap6 = new HashMap();
        HashMap hashMap7 = new HashMap();
        multiValueHashMap.internalHashMap().forEach((str5, collection) -> {
            LinkedList linkedList = new LinkedList();
            UnmodifiableIterator partition = Iterators.partition(collection.iterator(), 512);
            Objects.requireNonNull(linkedList);
            partition.forEachRemaining((v1) -> {
                r1.add(v1);
            });
            StopWatch estimatedEventCount = new StopWatch().setPrevix4report(String.format("Embedding batch trigger for '%s': ", str5)).setEstimatedEventCount(linkedList.size() * 2);
            LinkedList linkedList2 = new LinkedList();
            LinkedList linkedList3 = new LinkedList();
            LinkedList linkedList4 = new LinkedList();
            new LinkedList();
            Stream flatMap = linkedList.stream().map(list -> {
                estimatedEventCount.notifyEvent();
                List<String> list = list.stream().map((v0) -> {
                    return v0.textTrigger();
                }).toList();
                StringBuilder sb = (StringBuilder) hashMap6.computeIfAbsent(str5, str5 -> {
                    return new StringBuilder();
                });
                list.forEach(str6 -> {
                    sb.append(str6).append("     ");
                });
                return embeddings.createEmbeddings(list);
            }).flatMap((v0) -> {
                return v0.stream();
            });
            Objects.requireNonNull(linkedList3);
            flatMap.forEach((v1) -> {
                r1.add(v1);
            });
            Stream flatMap2 = linkedList.stream().map(list2 -> {
                estimatedEventCount.notifyEvent();
                int i = 12;
                List<String> list2 = list2.stream().map(offsetMatchId2 -> {
                    return processCrossNerText.plainText4Offsets().substring(Math.max(0, offsetMatchId2.startOffset() - (i / 2)), Math.min(processCrossNerText.plainText4Offsets().length(), offsetMatchId2.endOffset() + (i / 2)));
                }).toList();
                StringBuilder sb = (StringBuilder) hashMap7.computeIfAbsent(str5, str5 -> {
                    return new StringBuilder();
                });
                list2.forEach(str6 -> {
                    sb.append(str6).append("     ");
                });
                return embeddings.createEmbeddings(list2);
            }).flatMap((v0) -> {
                return v0.stream();
            });
            Objects.requireNonNull(linkedList4);
            flatMap2.forEach((v1) -> {
                r1.add(v1);
            });
            List of = List.of(embeddings.createEmbeddings(str5));
            linkedList2.addAll(linkedList3);
            linkedList2.addAll(linkedList4);
            linkedList2.addAll(of);
            hashMap.put(str5, linkedList2);
            hashMap2.put(str5, linkedList3);
            hashMap3.put(str5, linkedList4);
            hashMap4.put(str5, of);
        });
        HashMap hashMap8 = new HashMap();
        hashMap2.forEach((str6, list) -> {
            hashMap8.put(str6 + "_TextTrigger", list);
        });
        createHistograms(hashMap8, embeddings, hashMap6, hashMap7, hashMap5);
        if (z) {
            LoggerFactory.getLogger(NamedEntityDataTrainer4CrossNer.class.getName()).info("Histograms created. Will break now because of existing DB directory");
            return;
        }
        NamedEntityLinker namedEntityLinker = new NamedEntityLinker(str);
        namedEntityLinker.startWriting(true);
        hashMap2.forEach((str7, list2) -> {
            HashMap hashMap9 = new HashMap();
            Iterator it = list2.iterator();
            while (it.hasNext()) {
                ((HashMap) it.next()).forEach((str7, list2) -> {
                    ArrayRealVector arrayRealVector = new ArrayRealVector(CollectionUtilz.toSimpleDouble(list2));
                    arrayRealVector.mapDivideToSelf(list2.size());
                    ArrayRealVector arrayRealVector2 = (ArrayRealVector) hashMap9.get(str7);
                    hashMap9.put(str7, arrayRealVector2 == null ? arrayRealVector : arrayRealVector2.add((RealVector) arrayRealVector));
                });
            }
            LinkedList linkedList = new LinkedList();
            hashMap9.forEach((str8, arrayRealVector) -> {
                linkedList.add(new NamedEntityLinker.Field2Vector(str8, Arrays.stream(arrayRealVector.getDataRef()).mapToObj(d -> {
                    return Float.valueOf(Double.valueOf(d).floatValue());
                }).toList()));
            });
            namedEntityLinker.addEntity2Index(UUID.randomUUID().toString(), str7, null, ((StringBuilder) hashMap5.computeIfAbsent(str7, str9 -> {
                return new StringBuilder();
            })).toString(), null, null, (NamedEntityLinker.Field2Vector[]) linkedList.toArray(i -> {
                return new NamedEntityLinker.Field2Vector[i];
            }));
            valueBox.setValue(Integer.valueOf(((Integer) valueBox.getValue()).intValue() + 1));
            if (((Integer) valueBox.getValue()).intValue() % 10000 == 0) {
                LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("added " + StringUtils.beautifyNumber((Number) valueBox.getValue()) + " entities");
            }
        });
        LoggerFactory.getLogger((Class<?>) NamedEntityDataCreator.class).info("added " + StringUtils.beautifyNumber((Number) valueBox.getValue()) + " entities");
        namedEntityLinker.stopWriting();
        namedEntityLinker.close();
        StopWatch.stopAndLogDistance(currentTimeMillis, NamedEntityDataTrainer4CrossNer.class);
        LoggerFactory.getLogger((Class<?>) NamedEntityDataTrainer4CrossNer.class).info("finished");
    }

    public static void main(String[] strArr) throws Exception {
        if (strArr.length < 1 || strArr.length > 16) {
            System.out.println("NamedEntityDataTrainer4CrossNer -o <outputDirPath> -i <inputFilePath> -et <entityTypes4HighFrqTermsFileName> -em <embeddingsConfFileName>\nOptions:\n  -h/--help: this text\n  -o <outputDirPath> the directory where to write the output data. In the case you specify a relative path, the final path will be 'GlobalConstants.strAppBasePath'\n                     plus the given, relative parameter path\n  -i <inputFilePath> the file with the json entities to import\n  -et <entityTypes4HighFrqTermsFileName> the file with the entity types that indicates an entity in the case of an high frequent term.\n  -em <embeddingsConfFileName> the path to the text embeddings configuration\n\n As follows you can see an example CrossNer file that can be used as input. Download official CrossNer files at https://github.com/zliucr/CrossNER\n\nSOCCER  O\n-       O\nJAPAN   B-location\nGET     O\nLUCKY   O\nWIN     O\n,       O\nCHINA   B-person\nIN      O\nSURPRISE        O\nDEFEAT  O\n.       O\n");
            return;
        }
        String str = null;
        String str2 = null;
        String str3 = null;
        String str4 = null;
        int i = 0;
        while (i < strArr.length) {
            if ("-h".equals(strArr[i]) || "--help".equals(strArr[i])) {
                System.out.println("NamedEntityDataTrainer4CrossNer -o <outputDirPath> -i <inputFilePath> -et <entityTypes4HighFrqTermsFileName> -em <embeddingsConfFileName>\nOptions:\n  -h/--help: this text\n  -o <outputDirPath> the directory where to write the output data. In the case you specify a relative path, the final path will be 'GlobalConstants.strAppBasePath'\n                     plus the given, relative parameter path\n  -i <inputFilePath> the file with the json entities to import\n  -et <entityTypes4HighFrqTermsFileName> the file with the entity types that indicates an entity in the case of an high frequent term.\n  -em <embeddingsConfFileName> the path to the text embeddings configuration\n\n As follows you can see an example CrossNer file that can be used as input. Download official CrossNer files at https://github.com/zliucr/CrossNER\n\nSOCCER  O\n-       O\nJAPAN   B-location\nGET     O\nLUCKY   O\nWIN     O\n,       O\nCHINA   B-person\nIN      O\nSURPRISE        O\nDEFEAT  O\n.       O\n");
                return;
            }
            if ("-o".equals(strArr[i])) {
                if (strArr.length < i + 1 + 1) {
                    System.out.println("you must specify a path for parameter '-o'" + "NamedEntityDataTrainer4CrossNer -o <outputDirPath> -i <inputFilePath> -et <entityTypes4HighFrqTermsFileName> -em <embeddingsConfFileName>\nOptions:\n  -h/--help: this text\n  -o <outputDirPath> the directory where to write the output data. In the case you specify a relative path, the final path will be 'GlobalConstants.strAppBasePath'\n                     plus the given, relative parameter path\n  -i <inputFilePath> the file with the json entities to import\n  -et <entityTypes4HighFrqTermsFileName> the file with the entity types that indicates an entity in the case of an high frequent term.\n  -em <embeddingsConfFileName> the path to the text embeddings configuration\n\n As follows you can see an example CrossNer file that can be used as input. Download official CrossNer files at https://github.com/zliucr/CrossNER\n\nSOCCER  O\n-       O\nJAPAN   B-location\nGET     O\nLUCKY   O\nWIN     O\n,       O\nCHINA   B-person\nIN      O\nSURPRISE        O\nDEFEAT  O\n.       O\n");
                    return;
                } else {
                    str = strArr[i + 1];
                    if (!new File(str).isAbsolute()) {
                        str = GlobalConstants.strAppBasePath + "/" + str;
                    }
                }
            } else if ("-i".equals(strArr[i])) {
                if (strArr.length < i + 1 + 1) {
                    System.out.println("you must specify a file name for parameter '-i'" + "NamedEntityDataTrainer4CrossNer -o <outputDirPath> -i <inputFilePath> -et <entityTypes4HighFrqTermsFileName> -em <embeddingsConfFileName>\nOptions:\n  -h/--help: this text\n  -o <outputDirPath> the directory where to write the output data. In the case you specify a relative path, the final path will be 'GlobalConstants.strAppBasePath'\n                     plus the given, relative parameter path\n  -i <inputFilePath> the file with the json entities to import\n  -et <entityTypes4HighFrqTermsFileName> the file with the entity types that indicates an entity in the case of an high frequent term.\n  -em <embeddingsConfFileName> the path to the text embeddings configuration\n\n As follows you can see an example CrossNer file that can be used as input. Download official CrossNer files at https://github.com/zliucr/CrossNER\n\nSOCCER  O\n-       O\nJAPAN   B-location\nGET     O\nLUCKY   O\nWIN     O\n,       O\nCHINA   B-person\nIN      O\nSURPRISE        O\nDEFEAT  O\n.       O\n");
                    return;
                } else {
                    str2 = strArr[i + 1];
                    i++;
                }
            } else if ("-et".equals(strArr[i])) {
                if (strArr.length < i + 1 + 1) {
                    System.out.println("you must specify a path for parameter '-et'" + "NamedEntityDataTrainer4CrossNer -o <outputDirPath> -i <inputFilePath> -et <entityTypes4HighFrqTermsFileName> -em <embeddingsConfFileName>\nOptions:\n  -h/--help: this text\n  -o <outputDirPath> the directory where to write the output data. In the case you specify a relative path, the final path will be 'GlobalConstants.strAppBasePath'\n                     plus the given, relative parameter path\n  -i <inputFilePath> the file with the json entities to import\n  -et <entityTypes4HighFrqTermsFileName> the file with the entity types that indicates an entity in the case of an high frequent term.\n  -em <embeddingsConfFileName> the path to the text embeddings configuration\n\n As follows you can see an example CrossNer file that can be used as input. Download official CrossNer files at https://github.com/zliucr/CrossNER\n\nSOCCER  O\n-       O\nJAPAN   B-location\nGET     O\nLUCKY   O\nWIN     O\n,       O\nCHINA   B-person\nIN      O\nSURPRISE        O\nDEFEAT  O\n.       O\n");
                    return;
                } else {
                    str3 = strArr[i + 1];
                    i++;
                }
            } else if (!"-em".equals(strArr[i])) {
                continue;
            } else if (strArr.length < i + 1 + 1) {
                System.out.println("you must specify a path for parameter '-em'" + "NamedEntityDataTrainer4CrossNer -o <outputDirPath> -i <inputFilePath> -et <entityTypes4HighFrqTermsFileName> -em <embeddingsConfFileName>\nOptions:\n  -h/--help: this text\n  -o <outputDirPath> the directory where to write the output data. In the case you specify a relative path, the final path will be 'GlobalConstants.strAppBasePath'\n                     plus the given, relative parameter path\n  -i <inputFilePath> the file with the json entities to import\n  -et <entityTypes4HighFrqTermsFileName> the file with the entity types that indicates an entity in the case of an high frequent term.\n  -em <embeddingsConfFileName> the path to the text embeddings configuration\n\n As follows you can see an example CrossNer file that can be used as input. Download official CrossNer files at https://github.com/zliucr/CrossNER\n\nSOCCER  O\n-       O\nJAPAN   B-location\nGET     O\nLUCKY   O\nWIN     O\n,       O\nCHINA   B-person\nIN      O\nSURPRISE        O\nDEFEAT  O\n.       O\n");
                return;
            } else {
                str4 = strArr[i + 1];
                i++;
            }
            i++;
        }
        createNamedEntityDB(str, str2, str3, str4);
    }
}
