/*
 * Decompiled with CFR 0.152.
 */
package de.dfki.sds.kecs.modules;

import de.dfki.sds.hephaistos.storage.StorageItem;
import de.dfki.sds.hephaistos.storage.assertion.Assertion;
import de.dfki.sds.hephaistos.storage.assertion.AssertionPool;
import de.dfki.sds.hephaistos.storage.assertion.Concept;
import de.dfki.sds.hephaistos.storage.assertion.Intelligence;
import de.dfki.sds.hephaistos.storage.assertion.Phase;
import de.dfki.sds.hephaistos.storage.assertion.Rating;
import de.dfki.sds.hephaistos.storage.file.FileInfo;
import de.dfki.sds.hephaistos.storage.file.FileInfoStorage;
import de.dfki.sds.kecs.KecsApp;
import de.dfki.sds.kecs.KecsSettings;
import de.dfki.sds.kecs.modules.ConceptDiscovery;
import de.dfki.sds.kecs.modules.Module;
import de.dfki.sds.kecs.modules.ModuleUtils;
import de.dfki.sds.kecs.server.KecsHumlServer;
import de.dfki.sds.kecs.util.ExceptionUtility;
import de.dfki.sds.kecs.util.FileInfoSearchResult;
import de.dfki.sds.kecs.util.KecsUtils;
import de.dfki.sds.kecs.vocab.KECS;
import de.dfki.sds.mschroeder.commons.lang.RegexUtility;
import de.dfki.sds.mschroeder.commons.lang.SetUtility;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringJoiner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.sparql.vocabulary.FOAF;
import org.apache.jena.vocabulary.RDF;
import org.apache.jena.vocabulary.SKOS;
import org.json.JSONObject;

public class DomainTerminologyExtraction
extends Module {
    private final Phase aiPhase = Phase.DomainTerminologyExtraction;
    private final String aiName = "DomainTerminologyExtraction";
    private Set<String> stopwords;
    private Map<Resource, String> file2basename;
    private FileInfoStorage fileInfoStorage;

    @Override
    public void init(FileInfoStorage fileInfoStorage, AssertionPool pool, KecsSettings settings) {
        this.settings = settings;
        try {
            this.stopwords = new HashSet<String>(IOUtils.readLines(DomainTerminologyExtraction.class.getResourceAsStream("/de/dfki/sds/kecs/auxiliary/stopword_" + settings.getLanguage().name() + ".txt"), StandardCharsets.UTF_8));
        }
        catch (IOException ex) {
            ExceptionUtility.save(ex);
            throw new RuntimeException(ex);
        }
        this.fileInfoStorage = fileInfoStorage;
        this.initCache(fileInfoStorage);
    }

    private void initCache(FileInfoStorage fileInfoStorage) {
        this.file2basename = new HashMap<Resource, String>();
        long begin = System.currentTimeMillis();
        for (StorageItem storageItem : fileInfoStorage.getTreeIter(fileInfoStorage.getRoot())) {
            FileInfo fileInfo = (FileInfo)storageItem;
            JSONObject meta = new JSONObject(fileInfo.getMeta());
            String basename = meta.getString("basename");
            String uri = meta.getString("uri");
            Resource fileResource = KecsApp.creator.createResource(uri);
            this.file2basename.put(fileResource, basename);
        }
        long end = System.currentTimeMillis();
        System.out.println("DomainTerminologyExtraction file2basename init cache with " + this.file2basename.size() + " files took " + (end - begin) + " ms");
    }

    @Override
    public void bootstrap(FileInfoStorage fileInfoStorage, AssertionPool pool, KecsSettings settings) {
        this.bootstrapV1(fileInfoStorage, pool);
    }

    private void bootstrapV1(FileInfoStorage fileInfoStorage, AssertionPool pool) {
        String symbols = ModuleUtils.getSymbols();
        Iterable<StorageItem> iter = fileInfoStorage.getTreeIter(fileInfoStorage.getRoot());
        for (StorageItem node : iter) {
            FileInfo fileInfo = (FileInfo)node;
            JSONObject meta = new JSONObject(fileInfo.getMeta());
            String prefLabel = meta.getString("basename");
            Resource resource = KecsApp.creator.createResource(meta.getString("uri"));
            ArrayList<String> tokenList = new ArrayList<String>(Arrays.asList(StringUtils.splitByCharacterTypeCamelCase(prefLabel)));
            HashSet<String> tokenSet = new HashSet<String>(tokenList);
            for (String token : tokenSet) {
                if (StringUtils.containsOnly((CharSequence)token, " -_")) continue;
                Rating rating = Rating.Undecided;
                double conf = 1.0;
                if (rating == Rating.Undecided && token.length() <= 1) {
                    rating = Rating.Negative;
                }
                if (rating == Rating.Undecided) {
                    try {
                        int year = Integer.parseInt(token);
                        if (year >= 1980 && year <= 2030) {
                            rating = Rating.Positive;
                        }
                    }
                    catch (Exception exception) {
                        // empty catch block
                    }
                }
                if (rating == Rating.Undecided && this.stopwords.contains(token.toLowerCase())) {
                    rating = Rating.Negative;
                }
                if (rating == Rating.Undecided && token.matches("[a-zA-Z\u00fc\u00f6\u00e4\u00dc\u00d6\u00c4\u00df]+")) {
                    rating = Rating.Positive;
                }
                if (rating == Rating.Undecided && token.matches("\\d+")) {
                    rating = Rating.Negative;
                }
                if (rating == Rating.Undecided && StringUtils.containsOnly((CharSequence)token, symbols)) {
                    rating = Rating.Negative;
                }
                pool.assertStatement(resource, KECS.containsDomainTerm, token, Phase.DomainTerminologyExtraction, Intelligence.AI, "DomainTerminologyExtraction", rating, conf);
            }
        }
        pool.commit();
    }

    @Override
    public void updateOnChanges(FileInfoStorage fileInfoStorage, AssertionPool pool, List<Assertion> changes) {
        this.timeStat("DomainTerminologyExtraction", () -> {
            HashMap<Resource, Set> file2terms = new HashMap<Resource, Set>();
            for (Assertion changedAssertion : AssertionPool.filter(changes, null, KECS.containsDomainTerm, null, this.aiPhase, null, null, Rating.Positive, 0.0)) {
                Resource node = changedAssertion.getStatement().getSubject();
                String changedDomainTerm = changedAssertion.getStatement().getString();
                this.timeStat("positiveTermNegativeSubstrings", () -> this.positiveTermNegativeSubstrings(node, changedDomainTerm, pool));
                if (changedAssertion.getIntelligence() != Intelligence.NI) continue;
                file2terms.computeIfAbsent(node, n -> new HashSet()).add(changedDomainTerm);
            }
            for (Assertion assertion : AssertionPool.filter(changes, null, SKOS.hiddenLabel, null, Phase.ConceptDiscovery, Intelligence.NI, null, null, 0.0)) {
                file2terms.computeIfAbsent(null, n -> new HashSet()).add(assertion.getStatement().getString());
            }
            this.timeStat("generalizePositiveTerms", () -> this.generalizePositiveTerms(file2terms, pool));
        });
        this.saveTimeStat();
    }

    private void positiveTermNegativeSubstrings(Resource node, String changedDomainTerm, AssertionPool pool) {
        for (Assertion assertion : pool.getAssertions(node, KECS.containsDomainTerm, null, this.aiPhase, null, null, Rating.Positive, 0.0)) {
            String domainTerm = assertion.getStatement().getString();
            if (changedDomainTerm.equals(domainTerm) || !changedDomainTerm.contains(domainTerm)) continue;
            pool.assertStatement(assertion.getStatement(), this.aiPhase, Intelligence.AI, "DomainTerminologyExtraction", Rating.Negative, 1.0);
        }
    }

    private void generalizePositiveTerms(Map<Resource, Set<String>> file2posTerms, AssertionPool pool) {
        if (file2posTerms.isEmpty()) {
            return;
        }
        ArrayList<Term> terms = new ArrayList<Term>();
        for (Resource file : file2posTerms.keySet()) {
            for (String posTerm : file2posTerms.get(file)) {
                Term term = new Term();
                term.text = posTerm;
                term.variations = ModuleUtils.variations(posTerm);
                term.file = file;
                term.found.add(term.text);
                term.initRegex();
                terms.add(term);
            }
        }
        this.timeStat("generalizePositiveTerms.find", () -> this.find(terms));
        this.timeStat("generalizePositiveTerms.getOrCreateConcept", () -> this.getOrCreateConcept(terms, pool));
        this.timeStat("generalizePositiveTerms.linkToConcept", () -> this.linkToConcept(terms, pool, Intelligence.AI, "DomainTerminologyExtraction", false));
        this.timeStat("generalizePositiveTerms.explicitLink", () -> this.explicitLink(terms, pool));
    }

    public List<FileInfoSearchResult> search(String search, boolean regex, Resource folder) {
        ArrayList<FileInfoSearchResult> result = new ArrayList<FileInfoSearchResult>();
        if (search.isEmpty()) {
            return result;
        }
        Term term = new Term();
        term.text = search;
        Set<String> set = term.variations = regex ? new HashSet<String>(Arrays.asList(search)) : ModuleUtils.variations(search);
        if (!regex) {
            term.found.add(term.text);
        }
        term.isRegex = regex;
        term.initRegex();
        this.timeStat("search", () -> this.find(Arrays.asList(term)));
        for (Map.Entry<Resource, Set<String>> entry : term.resource2terms.entrySet()) {
            FileInfo fi = (FileInfo)this.fileInfoStorage.get(KecsUtils.getId(entry.getKey().getURI()));
            FileInfoSearchResult searchResult = new FileInfoSearchResult();
            searchResult.setFileInfo(fi);
            searchResult.setTerms(entry.getValue());
            searchResult.setVariations(term.variations);
            String nm = entry.getValue().iterator().next();
            String lbl = fi.getName();
            int i = lbl.indexOf(nm);
            searchResult.setLeft(lbl.substring(0, i));
            searchResult.setMiddle(lbl.substring(i, i + nm.length()));
            searchResult.setRight(lbl.substring(i + nm.length(), lbl.length()));
            result.add(searchResult);
        }
        result.sort((a, b) -> a.getFileInfo().getName().compareToIgnoreCase(b.getFileInfo().getName()));
        return result;
    }

    public void createFromSearch(List<Object[]> fileTermList, Resource type, AssertionPool pool, String username, boolean separately) {
        if (fileTermList.isEmpty()) {
            return;
        }
        ArrayList<Term> terms = new ArrayList<Term>();
        if (separately) {
            HashMap<String, Term> text2term = new HashMap<String, Term>();
            for (Object[] entry : fileTermList) {
                String text = (String)entry[1];
                Term term = (Term)text2term.get(text);
                if (term == null) {
                    term = new Term();
                    text2term.put(text, term);
                    terms.add(term);
                }
                term.text = text;
                term.found.add(term.text);
                term.variations.add(term.text);
                term.resource2terms.computeIfAbsent((Resource)entry[0], r -> new HashSet()).add(text);
            }
        } else {
            Term term = new Term();
            term.file = null;
            for (Object[] entry : fileTermList) {
                term.text = (String)entry[1];
                term.found.add(term.text);
                term.variations.add(term.text);
                term.resource2terms.computeIfAbsent((Resource)entry[0], r -> new HashSet()).add((String)entry[1]);
            }
            terms.add(term);
        }
        if (type != null) {
            this.getOrCreateConcept(terms, pool);
            this.typeConcept(terms, type, Intelligence.NI, username, pool);
            this.linkToConcept(terms, pool, Intelligence.AI, "DomainTerminologyExtraction", true);
            this.explicitLink(terms, pool);
        } else {
            this.linkToConcept(terms, pool, Intelligence.AI, "DomainTerminologyExtraction", true);
        }
        pool.commit();
        pool.notifyListenersRecursively(this.fileInfoStorage);
    }

    private void find(final List<Term> terms) {
        class RegexRunnable
        implements Runnable {
            Collection<Map.Entry<Resource, String>> entries;

            public RegexRunnable(Collection<Map.Entry<Resource, String>> entries) {
                this.entries = entries;
            }

            @Override
            public void run() {
                for (Term term : terms) {
                    for (int i = 0; i < term.patterns.size(); ++i) {
                        Pattern p = term.patterns.get(i);
                        if (p == null) continue;
                        for (Map.Entry<Resource, String> entry : this.entries) {
                            Matcher matcher = p.matcher(entry.getValue());
                            while (matcher.find()) {
                                if (!term.isRegex) {
                                    boolean fulfilled;
                                    boolean rightSep;
                                    boolean leftSep = matcher.group(1) == null;
                                    int len = matcher.group(2).length();
                                    boolean bl = rightSep = matcher.group(3) == null;
                                    if (len <= glueThreshold) {
                                        fulfilled = leftSep && rightSep;
                                    } else {
                                        boolean bl2 = fulfilled = leftSep || rightSep;
                                    }
                                    if (!fulfilled) continue;
                                    term.found.add(matcher.group(2));
                                    term.resource2terms.computeIfAbsent(entry.getKey(), n -> new HashSet()).add(matcher.group(2));
                                    continue;
                                }
                                String text = matcher.groupCount() > 1 ? matcher.group(2) : matcher.group();
                                if (text.isEmpty()) continue;
                                term.found.add(text);
                                term.resource2terms.computeIfAbsent(entry.getKey(), n -> new HashSet()).add(text);
                            }
                        }
                    }
                }
            }
        }
        final int glueThreshold = 9;
        int numThread = this.file2basename.size() > 50000 ? 4 : 0;
        ArrayList<Thread> threads = new ArrayList<Thread>();
        if (numThread > 1) {
            ArrayList<Map.Entry<Resource, String>> fileEntries = new ArrayList<Map.Entry<Resource, String>>(this.file2basename.entrySet());
            int splitSize = fileEntries.size() / numThread;
            for (int i = 0; i < numThread; ++i) {
                List sublist = fileEntries.subList(i * splitSize, i == numThread - 1 ? fileEntries.size() : (i + 1) * splitSize);
                RegexRunnable regexRunnable = new RegexRunnable(sublist);
                Thread thread = new Thread(regexRunnable);
                threads.add(thread);
                thread.start();
            }
            for (Thread t : threads) {
                try {
                    t.join();
                }
                catch (InterruptedException ex) {
                    throw new RuntimeException(ex);
                }
            }
        } else {
            RegexRunnable regexRunnable = new RegexRunnable(this.file2basename.entrySet());
            regexRunnable.run();
        }
    }

    private void getOrCreateConcept(List<Term> terms, AssertionPool pool) {
        double jaccardThreshold = 0.1;
        for (Term term : terms) {
            double maxJaccard = 0.0;
            Concept maxConcept = null;
            for (Concept concept : pool.getConcepts()) {
                HashSet<String> techLabelStrings = new HashSet<String>();
                for (Assertion assertion : concept.getHiddenLabels()) {
                    techLabelStrings.add(assertion.getStatement().getString());
                }
                double jaccard = (double)SetUtility.intersection(term.found, techLabelStrings).size() / (double)SetUtility.union(term.found, techLabelStrings).size();
                if (!(jaccard > maxJaccard)) continue;
                maxJaccard = jaccard;
                maxConcept = concept;
                if (maxJaccard != 1.0) continue;
                break;
            }
            if (maxConcept != null && maxJaccard >= jaccardThreshold) {
                term.matchedConcept = maxConcept.getResource();
                continue;
            }
            String prefLabel = ModuleUtils.toPrefLabel(term.text);
            Resource cpt = pool.createConcept();
            pool.assertStatement(cpt, RDF.type, ConceptDiscovery.DEFAULT_TYPE, Phase.ConceptDiscovery, Intelligence.AI, "DomainTerminologyExtraction", Rating.Positive, 1.0);
            pool.assertStatement(cpt, SKOS.prefLabel, prefLabel, Phase.ConceptDiscovery, Intelligence.AI, "DomainTerminologyExtraction", Rating.Positive, 1.0);
            term.found.add(prefLabel);
            ArrayList<String> allFoundTermsList = new ArrayList<String>(term.found);
            allFoundTermsList.sort((a, b) -> a.compareToIgnoreCase((String)b));
            for (String string : allFoundTermsList) {
                pool.assertStatement(cpt, SKOS.hiddenLabel, string, Phase.ConceptDiscovery, Intelligence.AI, "DomainTerminologyExtraction", Rating.Positive, 1.0);
            }
            term.matchedConcept = cpt;
        }
    }

    private void typeConcept(List<Term> terms, Resource type, Intelligence intel, String username, AssertionPool pool) {
        if (type == null || type.equals(ConceptDiscovery.DEFAULT_TYPE)) {
            return;
        }
        ArrayList<Assertion> assertions = new ArrayList<Assertion>();
        for (Term term : terms) {
            if (term.matchedConcept == null) continue;
            KecsHumlServer.addTypeAssertion(term.matchedConcept, type, Rating.Positive, username, assertions, pool);
        }
        for (Assertion assertion : assertions) {
            pool.assertAssertion(assertion);
        }
    }

    private void linkToConcept(List<Term> terms, AssertionPool pool, Intelligence intel, String username, boolean forceContainsDomainTerm) {
        for (Term term : terms) {
            for (Resource file : term.resource2terms.keySet()) {
                if (forceContainsDomainTerm || term.file == null || !term.file.equals(file)) {
                    for (String foundOne : term.resource2terms.get(file)) {
                        pool.assertStatement(file, KECS.containsDomainTerm, foundOne, Phase.DomainTerminologyExtraction, intel, username, Rating.Positive, 0.75);
                    }
                }
                if (term.matchedConcept == null) continue;
                pool.assertStatement(file, FOAF.topic, term.matchedConcept, Phase.ConceptDiscovery, intel, username, Rating.Positive, 0.75);
            }
        }
    }

    private void explicitLink(List<Term> terms, AssertionPool pool) {
        for (Term term : terms) {
            ArrayList<RDFNode> objects = new ArrayList<RDFNode>();
            for (String variation : SetUtility.union(term.variations, term.found)) {
                objects.add(KecsApp.creator.createLiteral(variation));
            }
            List<Assertion> assertions = pool.getAssertionsIn(KECS.containsDomainTerm, objects, this.aiPhase);
            for (Assertion assertion : assertions) {
                if (assertion.getRating() != Rating.Positive) continue;
                pool.assertStatement(assertion.getSubject(), FOAF.topic, term.matchedConcept, Phase.ConceptDiscovery, Intelligence.AI, "DomainTerminologyExtraction", Rating.Positive, 0.75);
            }
        }
    }

    private class Term {
        String text;
        Set<String> variations;
        Resource file;
        List<Pattern> patterns;
        Set<String> found = new HashSet<String>();
        boolean isRegex;
        Map<Resource, Set<String>> resource2terms;
        Resource matchedConcept;

        public Term() {
            this.variations = new HashSet<String>();
            this.resource2terms = new HashMap<Resource, Set<String>>();
        }

        public void initRegex() {
            int caseSensitiveLengthThreshold = 5;
            HashSet varCs = new HashSet();
            HashSet varCi = new HashSet();
            List<Set> varList = Arrays.asList(varCs, varCi);
            this.variations.forEach(v -> {
                if (this.isRegex) {
                    varCs.add(v);
                    varCi.add(v);
                } else if (v.length() <= caseSensitiveLengthThreshold) {
                    varCs.add(v);
                } else {
                    varCi.add(v);
                }
            });
            this.patterns = new ArrayList<Pattern>();
            for (int i = 0; i < varList.size(); ++i) {
                Set vars = varList.get(i);
                if (vars.isEmpty()) {
                    this.patterns.add(null);
                    continue;
                }
                String glue = "([a-zA-Z\u00dc\u00d6\u00c4\u00fc\u00f6\u00e4])?";
                StringBuilder patternSB = new StringBuilder();
                if (!this.isRegex) {
                    patternSB.append(glue);
                }
                StringJoiner sj = new StringJoiner("|", "(", ")");
                vars.forEach(v -> sj.add(this.isRegex ? v : RegexUtility.quote(v)));
                patternSB.append(sj.toString());
                if (!this.isRegex) {
                    patternSB.append(glue);
                }
                if (i == 0) {
                    this.patterns.add(Pattern.compile(patternSB.toString()));
                    continue;
                }
                this.patterns.add(Pattern.compile(patternSB.toString(), 2));
            }
        }
    }
}

