package de.dfki.km.email2pimo.dimension.topics;

import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;
import com.hp.hpl.jena.vocabulary.RDF;
import com.hp.hpl.jena.vocabulary.RDFS;
import de.dfki.km.email2pimo.Manager;
import de.dfki.km.email2pimo.accessor.E2PDatabase;
import de.dfki.km.email2pimo.accessor.Email;
import de.dfki.km.email2pimo.accessor.Folder;
import de.dfki.km.email2pimo.accessor.Token;
import de.dfki.km.email2pimo.analyzer.PhraseIndicator;
import de.dfki.km.email2pimo.analyzer.TermIndicator;
import de.dfki.km.email2pimo.evidence.DBEvidence;
import de.dfki.km.email2pimo.status.Email2PimoStatusApi;
import de.dfki.km.email2pimo.util.CountMap;
import de.dfki.km.email2pimo.vocabularies.E2P;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import javax.swing.SwingWorker;
import org.apache.log4j.Logger;

/* loaded from: input_file:de/dfki/km/email2pimo/dimension/topics/ClassifyCandidatesWorker.class */
public class ClassifyCandidatesWorker extends SwingWorker<Void, Void> {
    private static final Logger logger = Logger.getLogger(ClassifyCandidatesWorker.class.getName());
    private Email2PimoStatusApi status;
    private Multimap<String, NGramProfile> lang2profiles;
    private Set<TopicCandidate> topicCandidates;
    private int percent = 50;
    private int maxEvidenceCounts = 1000;
    private int contextSize = 3;
    private double thresh = 0.6d;
    private E2PDatabase db = Manager.getInstance().getE2PDatabase();

    public ClassifyCandidatesWorker(Email2PimoStatusApi email2PimoStatusApi) {
        this.status = email2PimoStatusApi;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* renamed from: doInBackground, reason: merged with bridge method [inline-methods] */
    public Void m8doInBackground() throws Exception {
        this.status.reportDisambiguateTopicWorkerStarted();
        this.lang2profiles = Manager.getInstance().getTopicProfiles();
        ArrayList newArrayList = Lists.newArrayList();
        this.db.query("DELETE FROM evidence WHERE conf_expl = '" + E2P.EvSrc.topicDisambiguation + "'");
        this.db.query("DELETE FROM evidence WHERE conf_expl = '" + E2P.EvSrc.tempLabelOccurrenceCount + "'");
        getAllTopicCandidates();
        Folder rootFolder = this.db.getRootFolder();
        Iterator randomizedEmailIterator = rootFolder.randomizedEmailIterator();
        int numberOfEmailsRek = (rootFolder.getNumberOfEmailsRek() * this.percent) / 100;
        int i = 0;
        while (i < numberOfEmailsRek && randomizedEmailIterator.hasNext()) {
            Email email = (Email) randomizedEmailIterator.next();
            if (email != null && email.getContent() != null && email.getContent().getTerms() != null) {
                i++;
                if (i % 100 == 0) {
                    System.out.println(i + "/" + numberOfEmailsRek + ", " + this.topicCandidates.size() + " candidates");
                }
                createLabelOccurrenceCounts(email);
                createDisambiguationScores(email, newArrayList);
            }
        }
        insertLabelOccurrenceCounts(newArrayList);
        this.db.executeEvidenceInserts(newArrayList);
        return null;
    }

    private void getAllTopicCandidates() {
        this.topicCandidates = Sets.newHashSet();
        Splitter omitEmptyStrings = Splitter.on(" ").trimResults().omitEmptyStrings();
        String str = "SELECT c.uri as uri, c.label as label FROM concepts c JOIN evidence e ON c.uri = e.subj WHERE e.pred = '" + RDF.type.toString() + "' AND e.obj = 'urn:email2pimo:topic:topic' AND e.conf_expl = '" + E2P.EvSrc.topicCandidateGeneration + "'";
        Statement statement = null;
        ResultSet resultSet = null;
        try {
            try {
                statement = this.db.getConnection().createStatement();
                resultSet = statement.executeQuery(str);
                while (resultSet.next()) {
                    String string = resultSet.getString("uri");
                    ArrayList newArrayList = Lists.newArrayList(omitEmptyStrings.split(resultSet.getString("label")));
                    if (newArrayList.size() != 0) {
                        if (newArrayList.size() == 1) {
                            this.topicCandidates.add(new TopicCandidate(string, new TermIndicator(0.0d, (String) null, (String) null, (String) newArrayList.get(0))));
                        } else {
                            String[] strArr = new String[newArrayList.size()];
                            for (int i = 0; i < newArrayList.size(); i++) {
                                strArr[i] = (String) newArrayList.get(i);
                            }
                            this.topicCandidates.add(new TopicCandidate(string, new PhraseIndicator(0.0d, (String) null, (String) null, strArr)));
                        }
                    }
                }
                E2PDatabase.closeStmtRes(statement, resultSet);
            } catch (SQLException e) {
                logger.warn("Exception executing query: " + str, e);
                E2PDatabase.closeStmtRes(statement, resultSet);
            }
        } catch (Throwable th) {
            E2PDatabase.closeStmtRes(statement, resultSet);
            throw th;
        }
    }

    private void createLabelOccurrenceCounts(Email email) {
        for (TopicCandidate topicCandidate : this.topicCandidates) {
            Iterator it = email.getContent().tokenIdx(topicCandidate.getIndicator()).iterator();
            while (it.hasNext()) {
                int intValue = ((Integer) it.next()).intValue();
                if (topicCandidate.getIndicator() instanceof TermIndicator) {
                    topicCandidate.addOccurrenceCount(email.getContent().tokenAt(intValue).getString());
                }
                if (topicCandidate.getIndicator() instanceof PhraseIndicator) {
                    PhraseIndicator indicator = topicCandidate.getIndicator();
                    List tokens = email.getContent().getTokens();
                    if (intValue + indicator.length() <= tokens.size()) {
                        topicCandidate.addOccurrenceCount(Joiner.on(" ").join(Iterables.transform(tokens.subList(intValue, intValue + indicator.length()), new Function<Token, String>() { // from class: de.dfki.km.email2pimo.dimension.topics.ClassifyCandidatesWorker.1
                            public String apply(Token token) {
                                return token.getString();
                            }
                        })));
                    }
                }
            }
        }
    }

    private void createDisambiguationScores(Email email, List<DBEvidence> list) {
        if (email == null || email.getContent() == null) {
            return;
        }
        String language = email.getContent().getLanguage();
        for (TopicCandidate topicCandidate : this.topicCandidates) {
            Iterator it = email.getContent().getTermPositions(topicCandidate.getIndicator()).iterator();
            while (it.hasNext()) {
                int intValue = ((Integer) it.next()).intValue();
                int i = intValue - 1;
                int i2 = intValue + 1;
                if (topicCandidate.getIndicator() instanceof PhraseIndicator) {
                    i2 += topicCandidate.getIndicator().length() - 1;
                }
                List terms = email.getContent().getTerms();
                ArrayList newArrayList = Lists.newArrayList();
                for (int i3 = i; i3 >= Math.max(0, (i - this.contextSize) + 1); i3--) {
                    newArrayList.add(terms.get(i3));
                }
                for (int i4 = i2; i4 <= Math.min(terms.size() - 1, (i2 + this.contextSize) - 1); i4++) {
                    newArrayList.add(terms.get(i4));
                }
                if (!newArrayList.isEmpty()) {
                    DBEvidence dBEvidence = null;
                    String str = null;
                    int i5 = Integer.MAX_VALUE;
                    int i6 = Integer.MAX_VALUE;
                    NGramProfile generateForContext = NGramProfile.generateForContext(language, newArrayList);
                    for (NGramProfile nGramProfile : this.lang2profiles.get(language)) {
                        int dist = generateForContext.dist(nGramProfile);
                        if (dist < i5) {
                            i6 = i5;
                            i5 = dist;
                            str = nGramProfile.getType();
                        }
                    }
                    if (i5 / i6 < this.thresh && topicCandidate.getEvidenceCount() < this.maxEvidenceCounts) {
                        dBEvidence = new DBEvidence(topicCandidate.getConceptUri(), RDF.type.toString(), str, (1.0d - (i5 / i6)) / this.maxEvidenceCounts, E2P.EvSrc.topicDisambiguation, email);
                        topicCandidate.incrementEvidenceCount();
                    }
                    if (dBEvidence != null) {
                        list.add(dBEvidence);
                        if (list.size() > 500) {
                            this.db.executeEvidenceInserts(list);
                            list.clear();
                        }
                    }
                }
            }
        }
    }

    private void insertLabelOccurrenceCounts(List<DBEvidence> list) {
        for (TopicCandidate topicCandidate : this.topicCandidates) {
            CountMap<String> occurrenceCounts = topicCandidate.getOccurrenceCounts();
            if (occurrenceCounts != null && occurrenceCounts.sum() > 0) {
                Iterator it = occurrenceCounts.keySet().iterator();
                while (it.hasNext()) {
                    list.add(new DBEvidence(topicCandidate.getConceptUri(), RDFS.label.toString(), (String) it.next(), occurrenceCounts.count(r0).intValue(), E2P.EvSrc.tempLabelOccurrenceCount));
                }
            }
            topicCandidate.resetOccurrenceCounts();
        }
    }

    protected void done() {
        try {
            get();
        } catch (InterruptedException e) {
            e.printStackTrace();
        } catch (ExecutionException e2) {
            e2.printStackTrace();
        }
        this.status.reportDisambiguateTopicWorkerFinished();
        this.status.reportTopicsPanelFinished();
    }
}
