package de.dfki.km.seed.nlp;

import com.google.common.base.Optional;
import com.optimaize.langdetect.LanguageDetector;
import com.optimaize.langdetect.LanguageDetectorBuilder;
import com.optimaize.langdetect.i18n.LdLocale;
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.profiles.LanguageProfileReader;
import com.optimaize.langdetect.text.CommonTextObjectFactories;
import de.dfki.km.pimo.api.PimoApi;
import edu.stanford.nlp.dcoref.CorefChain;
import edu.stanford.nlp.dcoref.CorefCoreAnnotations;
import edu.stanford.nlp.ie.AbstractSequenceClassifier;
import edu.stanford.nlp.ie.crf.CRFClassifier;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.DefaultPaths;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.util.CoreMap;
import edu.washington.cs.knowitall.argumentidentifier.ConfidenceMetric;
import edu.washington.cs.knowitall.extractor.R2A2;
import edu.washington.cs.knowitall.nlp.ChunkedSentence;
import edu.washington.cs.knowitall.nlp.extraction.ChunkedBinaryExtraction;
import edu.washington.cs.knowitall.normalization.BinaryExtractionNormalizer;
import edu.washington.cs.knowitall.normalization.NormalizedBinaryExtraction;
import edu.washington.cs.knowitall.util.DefaultObjects;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/classes/de/dfki/km/seed/nlp/SeedNlpImpl.class */
public class SeedNlpImpl implements SeedNlpApi, PimoApi {
    private static Logger logger = LoggerFactory.getLogger((Class<?>) SeedNlpImpl.class);
    private StanfordCoreNLP enPipeline;
    private StanfordCoreNLP dePipeline;
    private AbstractSequenceClassifier<CoreLabel> enClassifier;
    private AbstractSequenceClassifier<CoreLabel> deClassifier;
    private HashSet<String> refSet = new HashSet<>();
    private R2A2 r2a2;
    private ConfidenceMetric scoreFun;
    private static LanguageDetector _langDetector;

    public SeedNlpImpl() {
        initEn();
        initDe();
    }

    public static String detectLang(String str) {
        Optional<LdLocale> detect = _langDetector.detect(str);
        return detect.isPresent() ? detect.get().getLanguage() : "en";
    }

    public void initDe() {
        new Properties().put("annotators", "tokenize, ssplit");
        try {
            Properties properties = new Properties();
            properties.put("tokenizerOptions", "tokenizeNLs=true");
            this.deClassifier = CRFClassifier.getClassifier("netagger/dewac_175m_600.crf.ser.gz", properties);
        } catch (IOException | ClassCastException | ClassNotFoundException e) {
            e.printStackTrace();
            this.deClassifier = CRFClassifier.getClassifierNoExceptions("netagger/dewac_175m_600.crf.ser.gz");
        }
    }

    public void initEn() {
        Properties properties = new Properties();
        properties.put("annotators", "tokenize, ssplit, pos,lemma, ner, regexner, parse, dcoref");
        properties.put("pos.model", "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger");
        properties.put("ner.model", DefaultPaths.DEFAULT_NER_CONLL_MODEL);
        this.enPipeline = new StanfordCoreNLP(properties);
        this.refSet.add("he");
        this.refSet.add("his");
        this.refSet.add("him");
        this.refSet.add("she");
        this.refSet.add("her");
        this.refSet.add("hers");
        this.refSet.add("it");
        this.refSet.add("its");
        this.refSet.add("they");
        this.refSet.add("their");
        this.refSet.add("them");
        this.refSet.add("theirs");
        this.refSet.add("there");
        this.refSet.add("here");
        try {
            Properties properties2 = new Properties();
            properties2.put("tokenizerOptions", "tokenizeNLs=true");
            this.enClassifier = CRFClassifier.getClassifier(DefaultPaths.DEFAULT_NER_THREECLASS_MODEL, properties2);
        } catch (IOException | ClassCastException | ClassNotFoundException e) {
            e.printStackTrace();
            this.enClassifier = CRFClassifier.getClassifierNoExceptions(DefaultPaths.DEFAULT_NER_THREECLASS_MODEL);
        }
        this.r2a2 = new R2A2();
        try {
            this.scoreFun = new ConfidenceMetric();
        } catch (IOException e2) {
            e2.printStackTrace();
        }
    }

    @Override // de.dfki.km.seed.nlp.SeedNlpApi
    public Hashtable<String, List<String>> findEntities(String str, String str2) {
        if (detectLang(str2).equals("de")) {
            Pattern compile = Pattern.compile("<I-PER>(.+?)</I-PER>");
            Pattern compile2 = Pattern.compile("<I-LOC>(.+?)</I-LOC>");
            Pattern compile3 = Pattern.compile("<I-ORG>(.+?)</I-ORG>");
            Hashtable<String, List<String>> hashtable = new Hashtable<>();
            String classifyWithInlineXML = this.deClassifier.classifyWithInlineXML(str2);
            List<String> tagValues = getTagValues(classifyWithInlineXML, compile);
            if (tagValues.size() > 0) {
                hashtable.put("PERSON", new ArrayList());
                hashtable.get("PERSON").addAll(tagValues);
            }
            List<String> tagValues2 = getTagValues(classifyWithInlineXML, compile2);
            if (tagValues2.size() > 0) {
                hashtable.put("LOCATION", new ArrayList());
                hashtable.get("LOCATION").addAll(tagValues2);
            }
            List<String> tagValues3 = getTagValues(classifyWithInlineXML, compile3);
            if (tagValues3.size() > 0) {
                hashtable.put("ORGANIZATION", new ArrayList());
                hashtable.get("ORGANIZATION").addAll(tagValues3);
            }
            return hashtable;
        }
        Pattern compile4 = Pattern.compile("<PERSON>(.+?)</PERSON>");
        Pattern compile5 = Pattern.compile("<LOCATION>(.+?)</LOCATION>");
        Pattern compile6 = Pattern.compile("<ORGANIZATION>(.+?)</ORGANIZATION>");
        Hashtable<String, List<String>> hashtable2 = new Hashtable<>();
        String classifyWithInlineXML2 = this.enClassifier.classifyWithInlineXML(str2);
        List<String> tagValues4 = getTagValues(classifyWithInlineXML2, compile4);
        if (tagValues4.size() > 0) {
            hashtable2.put("PERSON", new ArrayList());
            hashtable2.get("PERSON").addAll(tagValues4);
        }
        List<String> tagValues5 = getTagValues(classifyWithInlineXML2, compile5);
        if (tagValues5.size() > 0) {
            hashtable2.put("LOCATION", new ArrayList());
            hashtable2.get("LOCATION").addAll(tagValues5);
        }
        List<String> tagValues6 = getTagValues(classifyWithInlineXML2, compile6);
        if (tagValues6.size() > 0) {
            hashtable2.put("ORGANIZATION", new ArrayList());
            hashtable2.get("ORGANIZATION").addAll(tagValues6);
        }
        return hashtable2;
    }

    private List<String> getTagValues(String str, Pattern pattern) {
        ArrayList arrayList = new ArrayList();
        Matcher matcher = pattern.matcher(str);
        while (matcher.find()) {
            if (arrayList.indexOf(matcher.group(1)) == -1) {
                arrayList.add(matcher.group(1));
            }
        }
        return arrayList;
    }

    @Override // de.dfki.km.seed.nlp.SeedNlpApi
    public String replaceMentionsByEntities(String str, String str2) {
        String str3 = "";
        Annotation annotation = new Annotation(str2);
        this.enPipeline.annotate(annotation);
        Map map = (Map) annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
        if (map == null) {
            return str2;
        }
        List list = (List) annotation.get(CoreAnnotations.SentencesAnnotation.class);
        for (Map.Entry entry : map.entrySet()) {
            CorefChain corefChain = (CorefChain) entry.getValue();
            if (corefChain.getMentionsInTextualOrder().size() != 1) {
                System.out.println("ClusterId: " + entry.getKey());
                CorefChain.CorefMention representativeMention = corefChain.getRepresentativeMention();
                System.out.println("Representative Mention: " + representativeMention.mentionSpan);
                List<CorefChain.CorefMention> mentionsInTextualOrder = corefChain.getMentionsInTextualOrder();
                System.out.println("Referrals:");
                for (int i = 0; i < mentionsInTextualOrder.size(); i++) {
                    CorefChain.CorefMention corefMention = mentionsInTextualOrder.get(i);
                    System.out.print(corefMention.mentionSpan + " @" + corefMention.sentNum + StringUtils.SPACE + corefMention.startIndex + StringUtils.SPACE + corefMention.endIndex + "      ");
                    System.out.println();
                    for (int i2 = corefMention.startIndex - 1; i2 < corefMention.endIndex - 1; i2++) {
                        if (this.refSet.contains(((String) ((CoreLabel) ((List) ((CoreMap) list.get(corefMention.sentNum - 1)).get(CoreAnnotations.TokensAnnotation.class)).get(i2)).get(CoreAnnotations.TextAnnotation.class)).toLowerCase())) {
                            ((CoreLabel) ((List) ((CoreMap) list.get(corefMention.sentNum - 1)).get(CoreAnnotations.TokensAnnotation.class)).get(i2)).set(CoreAnnotations.TextAnnotation.class, representativeMention.mentionSpan);
                        }
                    }
                }
            }
        }
        Iterator it = list.iterator();
        while (it.hasNext()) {
            Iterator it2 = ((List) ((CoreMap) it.next()).get(CoreAnnotations.TokensAnnotation.class)).iterator();
            while (it2.hasNext()) {
                str3 = str3 + ((String) ((CoreLabel) it2.next()).get(CoreAnnotations.TextAnnotation.class)) + StringUtils.SPACE;
            }
        }
        return str3;
    }

    @Override // de.dfki.km.seed.nlp.SeedNlpApi
    public List<Triplet> getRelations(String str, String str2) {
        ArrayList arrayList = new ArrayList();
        BinaryExtractionNormalizer binaryExtractionNormalizer = new BinaryExtractionNormalizer();
        try {
            Iterator<ChunkedSentence> it = DefaultObjects.getDefaultSentenceReader(new StringReader(str2)).getSentences().iterator();
            while (it.hasNext()) {
                for (ChunkedBinaryExtraction chunkedBinaryExtraction : this.r2a2.extract(it.next())) {
                    NormalizedBinaryExtraction normalize = binaryExtractionNormalizer.normalize(chunkedBinaryExtraction);
                    double conf = this.scoreFun.getConf(chunkedBinaryExtraction);
                    System.out.println("score " + conf);
                    Triplet triplet = new Triplet(normalize.getArgument1().toString().replaceAll("\\b(The|the|a|an)\\b", "").trim(), normalize.getArgument2().toString().replaceAll("\\b(The|the|a|an)\\b", "").trim(), normalize.getRelation().toString().replaceAll("\\b(The|the|a|an)\\b", "").trim(), conf);
                    System.out.println("Triplet: " + triplet);
                    arrayList.add(triplet);
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        if (arrayList.size() > 0) {
            return arrayList;
        }
        return null;
    }

    public void shutDown() {
        StanfordCoreNLP.clearAnnotatorPool();
        this.enPipeline = null;
        this.dePipeline = null;
    }

    static {
        try {
            _langDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()).withProfiles(new LanguageProfileReader().readAllBuiltIn()).build();
            CommonTextObjectFactories.forDetectingOnLargeText();
        } catch (IOException e) {
            logger.error("An exception occurred while initializing language identifier", (Throwable) e);
        }
    }
}
