package de.dfki.km.email2pimo.dimension.topics;

import com.aliasi.chunk.Chunk;
import com.aliasi.dict.DictionaryEntry;
import com.aliasi.dict.ExactDictionaryChunker;
import com.aliasi.dict.MapDictionary;
import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory;
import de.dfki.km.email2pimo.Manager;
import de.dfki.km.email2pimo.accessor.Email;
import de.dfki.km.email2pimo.accessor.EmailContent;
import de.dfki.km.email2pimo.analyzer.EmailAnalyzerContentPattern;
import de.dfki.km.email2pimo.vocabularies.E2P;
import java.util.Iterator;

/* loaded from: input_file:de/dfki/km/email2pimo/dimension/topics/DBPediaCategoryTopicsPattern.class */
public class DBPediaCategoryTopicsPattern implements EmailAnalyzerContentPattern {
    private TopicManager tm;
    private MapDictionary<String> dict;
    private ExactDictionaryChunker chunker;

    public DBPediaCategoryTopicsPattern(TopicManager topicManager) {
        this.tm = topicManager;
        initDict();
    }

    private void initDict() {
        this.dict = new MapDictionary<>();
        Iterator<String> it = Manager.getInstance().getGazetteer(E2P.Topic.topic).getInstances().iterator();
        while (it.hasNext()) {
            this.dict.addEntry(new DictionaryEntry(it.next(), E2P.Topic.topic, 1.0d));
        }
        this.chunker = new ExactDictionaryChunker(this.dict, IndoEuropeanTokenizerFactory.INSTANCE, false, false);
    }

    @Override // de.dfki.km.email2pimo.analyzer.EmailAnalyzerContentPattern
    public void reportContent(Email email) throws Exception {
        EmailContent content = email.getContent();
        String str = content.getCleanedSubject() + " " + content.getMessageContent() + " " + content.getQuotedContent() + " " + content.getSignatureContent();
        for (Chunk chunk : this.chunker.chunk(str).chunkSet()) {
            this.tm.reportConcept(chunk.type(), str.substring(chunk.start(), chunk.end()), 1.0E-4d, "DBPedia Category Gazetteer", email);
        }
    }
}
