package de.dfki.km.email2pimo.accessor;

import com.google.common.base.CharMatcher;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;
import de.dfki.km.email2pimo.Manager;
import de.dfki.km.email2pimo.accessor.EmailTextAnnotator;
import de.dfki.km.email2pimo.analyzer.Indicator;
import de.dfki.km.email2pimo.analyzer.PhraseIndicator;
import de.dfki.km.email2pimo.analyzer.RegexIndicator;
import de.dfki.km.email2pimo.analyzer.TermIndicator;
import de.dfki.km.email2pimo.util.CountMap;
import de.dfki.km.email2pimo.util.E2PUtilities;
import de.dfki.km.email2pimo.util.NLPUtil;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import opennlp.tools.util.Span;
import org.apache.log4j.Logger;

/* loaded from: input_file:de/dfki/km/email2pimo/accessor/EmailContent.class */
public class EmailContent {
    private static final Logger logger = Logger.getLogger(EmailContent.class.getName());
    private static Splitter splitToTokens = Splitter.on(CharMatcher.anyOf(" \t\r\n,.-;:_+*#'^°!\"§$%&/()=?´`{[]}\\<>|@")).omitEmptyStrings();
    private static EmailTextAnnotator emailTextAnnotator = new SimpleEmailTextAnnotator();
    private static Joiner joinText = Joiner.on(" ");
    private static Function<String, String> funcRemoveQuoteChars = new Function<String, String>() { // from class: de.dfki.km.email2pimo.accessor.EmailContent.1
        public String apply(String str) {
            return EmailContent.removeQuoteChars(str);
        }
    };
    private static final ImmutableSet<String> commonSubjectTerms = new ImmutableSet.Builder().add("re:").add("wg:").add("fw:").add("aw:").add("fwd:").add("fwd ").add("[").add("]").add(">").add("<").add("(").add(")").build();
    private String language;
    private String rawSubject;
    private String rawMessage;
    private String[] lines = null;
    private EmailTextAnnotator.LineAnnotation[] annos = null;
    private List<Sentence> sentences = null;
    private List<Token> tokens = null;
    private List<Token> subjectTokens = null;
    private HashMultimap<String, Integer> term2ecTokenIdxs = null;
    private List<String> terms = null;
    private Multimap<String, Integer> termPositions = null;
    private CountMap<String> termFrequencies = null;

    /* loaded from: input_file:de/dfki/km/email2pimo/accessor/EmailContent$NounGrouping.class */
    public enum NounGrouping {
        REJECT_NOT_CLOSELY_CONNECTED,
        RETAIN_NOT_CLOSELY_CONNECTED,
        RETAIN_UNTIL_NOT_CLOSELY_CONNECTED
    }

    /* loaded from: input_file:de/dfki/km/email2pimo/accessor/EmailContent$NounTokenPhraseExtraction.class */
    public enum NounTokenPhraseExtraction {
        REJECT_NOT_CLOSELY_CONNECTED,
        RETAIN_UNTIL_NOT_CLOSELY_CONNECTED,
        RETAIN_NOT_CLOSELY_CONNECTED
    }

    public EmailContent(String str, String str2, String str3) {
        this.language = str;
        this.rawSubject = str2 != null ? str2 : "";
        this.rawMessage = str3 != null ? str3 : "";
    }

    public String toString() {
        initAnnotations();
        return "(EMAIL-CONTENT :language \"" + this.language + "\" :lines " + this.lines.length + ")";
    }

    public void init() {
        initAnnotations();
        initSentences();
        initTokens();
    }

    private void initAnnotations() {
        if (this.annos == null) {
            this.lines = this.rawMessage.split("\r\n|\r|\n");
            this.annos = emailTextAnnotator.annotate(this.lines);
        }
    }

    public String toLineAnnotatedString() {
        initAnnotations();
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < this.lines.length; i++) {
            sb.append(this.annos[i]);
            for (int i2 = 0; i2 < 10 - this.annos[i].toString().length(); i2++) {
                sb.append(" ");
            }
            sb.append("| " + this.lines[i]);
            sb.append(System.getProperty("line.separator"));
        }
        return sb.toString();
    }

    private void initSentences() {
        int length;
        if (this.sentences == null) {
            initAnnotations();
            this.sentences = Lists.newArrayList();
            if (this.annos == null || this.annos.length == 0) {
                return;
            }
            int i = 0;
            EmailTextAnnotator.LineAnnotation lineAnnotation = null;
            ArrayList newArrayList = Lists.newArrayList();
            int i2 = 0;
            for (int i3 = 0; i3 < this.annos.length; i3++) {
                if (lineAnnotation == null || lineAnnotation.equals(this.annos[i3])) {
                    lineAnnotation = this.annos[i3];
                    newArrayList.add(removeQuoteChars(this.lines[i3]));
                    length = i2 + this.lines[i3].length();
                } else {
                    processParagraph(lineAnnotation, newArrayList, i);
                    i = i + i2 + 1;
                    lineAnnotation = this.annos[i3];
                    newArrayList.clear();
                    newArrayList.add(this.lines[i3]);
                    length = this.lines[i3].length();
                }
                i2 = length;
            }
            processParagraph(lineAnnotation, newArrayList, i);
        }
    }

    private void processParagraph(EmailTextAnnotator.LineAnnotation lineAnnotation, List<String> list, int i) {
        try {
            if (lineAnnotation.equals(EmailTextAnnotator.LineAnnotation.MSG) || lineAnnotation.equals(EmailTextAnnotator.LineAnnotation.QUOT) || lineAnnotation.equals(EmailTextAnnotator.LineAnnotation.SIG)) {
                int i2 = i;
                for (String str : Manager.getInstance().getSentenceDetector(this.language).sentDetect(lineAnnotation.equals(EmailTextAnnotator.LineAnnotation.QUOT) ? joinText.join(Iterables.transform(list, funcRemoveQuoteChars)) : joinText.join(list))) {
                    this.sentences.add(new Sentence(this, lineAnnotation, str, i2));
                    i2 += str.length();
                }
            }
        } catch (Exception e) {
            logger.warn("Exception when processing a paragraph: " + e.getMessage());
        }
    }

    public Sentence sentenceAt(int i) {
        initSentences();
        return this.tokens.get(i).getSentence();
    }

    private void initTokens() {
        if (this.tokens == null) {
            initSentences();
            this.tokens = Lists.newArrayList();
            this.term2ecTokenIdxs = HashMultimap.create();
            int i = 0;
            for (Sentence sentence : this.sentences) {
                int i2 = 0;
                String text = sentence.getText();
                for (Span span : Manager.getInstance().getTokenizer(this.language).tokenizePos(text)) {
                    try {
                        String coveredText = span.getCoveredText(text);
                        Token token = new Token(sentence, coveredText, span.getStart(), i2, i);
                        this.tokens.add(token);
                        sentence.add(token);
                        this.term2ecTokenIdxs.put(coveredText.toLowerCase(), Integer.valueOf(i));
                    } catch (Exception e) {
                        logger.trace("Caught: ", e);
                    }
                    i2++;
                    i++;
                }
            }
        }
    }

    public Token tokenAt(int i) {
        initTokens();
        return this.tokens.get(i);
    }

    public Set<Integer> tokenIdx(String str) {
        initTokens();
        return this.term2ecTokenIdxs.get(str);
    }

    public Set<Integer> tokenIdx(Pattern pattern) {
        initTokens();
        HashSet newHashSet = Sets.newHashSet();
        for (String str : this.term2ecTokenIdxs.keySet()) {
            if (pattern.matcher(str).matches()) {
                newHashSet.addAll(this.term2ecTokenIdxs.get(str));
            }
        }
        return newHashSet;
    }

    public Set<Integer> phraseIdx(String... strArr) {
        if (strArr == null || strArr.length < 1) {
            return Sets.newHashSet();
        }
        initTokens();
        HashSet newHashSet = Sets.newHashSet();
        for (Integer num : tokenIdx(strArr[0])) {
            if ((num.intValue() + strArr.length) - 1 <= tokenAt(num.intValue()).getSentence().getLastTokenIdx()) {
                boolean z = true;
                int i = 1;
                while (true) {
                    if (i >= strArr.length) {
                        break;
                    }
                    if (!tokenAt(num.intValue() + i).getString().toLowerCase().equals(strArr[i])) {
                        z = false;
                        break;
                    }
                    i++;
                }
                if (z) {
                    newHashSet.add(num);
                }
            }
        }
        return newHashSet;
    }

    public Set<Integer> tokenIdx(Indicator indicator) {
        initTokens();
        HashSet newHashSet = Sets.newHashSet();
        if (indicator instanceof TermIndicator) {
            Iterator<Integer> it = tokenIdx(((TermIndicator) indicator).getTerm()).iterator();
            while (it.hasNext()) {
                newHashSet.add(it.next());
            }
        } else if (indicator instanceof RegexIndicator) {
            Iterator<Integer> it2 = tokenIdx(((RegexIndicator) indicator).getPattern()).iterator();
            while (it2.hasNext()) {
                newHashSet.add(it2.next());
            }
        } else if (indicator instanceof PhraseIndicator) {
            Iterator<Integer> it3 = phraseIdx(((PhraseIndicator) indicator).getPhrase()).iterator();
            while (it3.hasNext()) {
                newHashSet.add(it3.next());
            }
        } else {
            logger.warn("Unsupported indicator!");
        }
        return newHashSet;
    }

    public Set<Token> tokenContext(int i, int i2) {
        initTokens();
        HashSet newHashSet = Sets.newHashSet();
        for (int max = Math.max(0, i - i2); max < Math.min(this.tokens.size(), i + i2); max++) {
            if (max != i) {
                newHashSet.add(this.tokens.get(max));
            }
        }
        return newHashSet;
    }

    public TokenPhrase getNounTokenPhrase(int i) {
        TokenPhrase tokenPhrase = null;
        for (int i2 = i; i2 < this.tokens.size() && NLPUtil.isNounTag(this.tokens.get(i2).getPosTag(), this.language); i2++) {
            if (tokenPhrase == null) {
                tokenPhrase = new TokenPhrase();
                tokenPhrase.setLanguage(this.language);
            }
            tokenPhrase.add(this.tokens.get(i2));
        }
        return tokenPhrase;
    }

    public TokenPhrase getNounTokenPhraseStartingAt(int i, NounTokenPhraseExtraction nounTokenPhraseExtraction) {
        if (i >= this.tokens.size()) {
            return null;
        }
        TokenPhrase tokenPhrase = null;
        for (int i2 = i; i2 <= tokenAt(i).getSentence().getLastTokenIdx() && NLPUtil.isProperNounTag(tokenAt(i2).getPosTag(), getLanguage()) && !tokenAt(i2).onlySpecialChars(); i2++) {
            if (tokenPhrase == null) {
                tokenPhrase = new TokenPhrase();
            }
            tokenPhrase.addLast(tokenAt(i2));
        }
        if (tokenPhrase != null) {
            if (nounTokenPhraseExtraction.equals(NounTokenPhraseExtraction.RETAIN_NOT_CLOSELY_CONNECTED)) {
                return tokenPhrase;
            }
            if (nounTokenPhraseExtraction.equals(NounTokenPhraseExtraction.REJECT_NOT_CLOSELY_CONNECTED)) {
                if (tokenPhrase.isCloselyConnected()) {
                    return tokenPhrase;
                }
                return null;
            }
            if (nounTokenPhraseExtraction.equals(NounTokenPhraseExtraction.RETAIN_UNTIL_NOT_CLOSELY_CONNECTED)) {
                TokenPhrase tokenPhrase2 = new TokenPhrase();
                tokenPhrase2.add(tokenPhrase.get(0));
                for (int i3 = 1; i3 < tokenPhrase.size() && tokenPhrase.getGapBetweenPreviousTokenOf(i3) <= 1; i3++) {
                    tokenPhrase2.addLast(tokenPhrase.get(i3));
                }
                return tokenPhrase2;
            }
        }
        return tokenPhrase;
    }

    public TokenPhrase getNounTokenPhraseEndingAt(int i, NounTokenPhraseExtraction nounTokenPhraseExtraction) {
        TokenPhrase tokenPhrase = null;
        for (int i2 = i; i2 >= Math.max(0, tokenAt(i).getSentence().getFirstTokenIdx()) && NLPUtil.isProperNounTag(tokenAt(i2).getPosTag(), this.language) && !tokenAt(i2).onlySpecialChars(); i2--) {
            if (tokenPhrase == null) {
                tokenPhrase = new TokenPhrase();
            }
            tokenPhrase.addFirst(tokenAt(i2));
        }
        if (tokenPhrase != null) {
            if (nounTokenPhraseExtraction.equals(NounTokenPhraseExtraction.RETAIN_NOT_CLOSELY_CONNECTED)) {
                return tokenPhrase;
            }
            if (nounTokenPhraseExtraction.equals(NounTokenPhraseExtraction.REJECT_NOT_CLOSELY_CONNECTED)) {
                if (tokenPhrase.isCloselyConnected()) {
                    return tokenPhrase;
                }
                return null;
            }
            if (nounTokenPhraseExtraction.equals(NounTokenPhraseExtraction.RETAIN_UNTIL_NOT_CLOSELY_CONNECTED)) {
                TokenPhrase tokenPhrase2 = new TokenPhrase();
                tokenPhrase2.add(tokenPhrase.get(0));
                for (int i3 = 1; i3 < tokenPhrase.size(); i3++) {
                    if (tokenPhrase.getGapBetweenPreviousTokenOf(i3) > 1) {
                        tokenPhrase2 = new TokenPhrase();
                        tokenPhrase2.add(tokenPhrase.get(i3));
                    } else {
                        tokenPhrase2.add(tokenPhrase.get(i3));
                    }
                }
                return tokenPhrase2;
            }
        }
        return tokenPhrase;
    }

    public TokenPhrase getNounTokenPhraseAt(int i, NounTokenPhraseExtraction nounTokenPhraseExtraction) {
        if (i >= this.tokens.size()) {
            return null;
        }
        TokenPhrase tokenPhrase = null;
        for (int i2 = i; i2 >= Math.max(0, tokenAt(i).getSentence().getFirstTokenIdx()) && NLPUtil.isProperNounTag(tokenAt(i2).getPosTag(), getLanguage()) && !tokenAt(i2).onlySpecialChars(); i2--) {
            if (tokenPhrase == null) {
                tokenPhrase = new TokenPhrase();
            }
            tokenPhrase.addFirst(tokenAt(i2));
        }
        for (int i3 = i + 1; i3 <= tokenAt(i).getSentence().getLastTokenIdx() && NLPUtil.isProperNounTag(tokenAt(i3).getPosTag(), getLanguage()) && !tokenAt(i3).onlySpecialChars(); i3++) {
            if (tokenPhrase == null) {
                tokenPhrase = new TokenPhrase();
            }
            tokenPhrase.addLast(tokenAt(i3));
        }
        if (tokenPhrase != null) {
            if (nounTokenPhraseExtraction.equals(NounTokenPhraseExtraction.RETAIN_NOT_CLOSELY_CONNECTED)) {
                return tokenPhrase;
            }
            if (nounTokenPhraseExtraction.equals(NounTokenPhraseExtraction.REJECT_NOT_CLOSELY_CONNECTED)) {
                if (tokenPhrase.isCloselyConnected()) {
                    return tokenPhrase;
                }
                return null;
            }
            if (nounTokenPhraseExtraction.equals(NounTokenPhraseExtraction.RETAIN_UNTIL_NOT_CLOSELY_CONNECTED)) {
                TokenPhrase tokenPhrase2 = new TokenPhrase();
                tokenPhrase2.add(tokenPhrase.get(0));
                for (int i4 = 1; i4 < tokenPhrase.size(); i4++) {
                    if (tokenPhrase.getGapBetweenPreviousTokenOf(i4) > 1) {
                        tokenPhrase2 = new TokenPhrase();
                        tokenPhrase2.add(tokenPhrase.get(i4));
                    } else {
                        tokenPhrase2.add(tokenPhrase.get(i4));
                    }
                }
                return tokenPhrase2;
            }
        }
        return tokenPhrase;
    }

    public void initTermFrequencies() {
        if (this.termFrequencies == null) {
            initTokens();
            this.terms = Lists.newArrayList();
            this.termFrequencies = CountMap.create();
            Iterator<T> it = getTokens().iterator();
            while (it.hasNext()) {
                Iterator it2 = splitToTokens.split(((Token) it.next()).getNormalizedTerm()).iterator();
                while (it2.hasNext()) {
                    String trim = ((String) it2.next()).trim();
                    if (trim.length() >= 3) {
                        this.terms.add(trim);
                        this.termFrequencies.increment(trim);
                    }
                }
            }
        }
        if (this.termPositions == null) {
            initTokens();
            this.termPositions = HashMultimap.create();
            int i = 0;
            Iterator<String> it3 = this.terms.iterator();
            while (it3.hasNext()) {
                this.termPositions.put(it3.next(), Integer.valueOf(i));
                i++;
            }
        }
    }

    public int termCount(String str) {
        initTermFrequencies();
        return this.termFrequencies.count((CountMap<String>) str).intValue();
    }

    public double tf(String str) {
        int termCount = termCount(str);
        int numberOfTerms = getNumberOfTerms();
        if (numberOfTerms == 0) {
            return 0.0d;
        }
        return termCount / numberOfTerms;
    }

    public void setTermFrequencies(List<String> list, CountMap<String> countMap) {
        this.terms = list;
        this.termFrequencies = countMap;
    }

    public void setTerms(Iterable<String> iterable) {
        this.terms = Lists.newArrayList();
        this.termFrequencies = CountMap.create();
        for (String str : iterable) {
            this.termFrequencies.increment(str);
            this.terms.add(str);
        }
    }

    public Set<String> getBagOfTerms() {
        initTermFrequencies();
        return this.termFrequencies.keySet();
    }

    public List<String> getTerms() {
        initTermFrequencies();
        return this.terms;
    }

    public Collection<Integer> getTermPositions(String str) {
        initTermFrequencies();
        return this.termPositions.get(str);
    }

    public Collection<Integer> getTermPositions(Indicator indicator) {
        if (indicator instanceof TermIndicator) {
            TermIndicator termIndicator = (TermIndicator) indicator;
            return (termIndicator.getTerm() == null || termIndicator.getTerm().length() == 0) ? Sets.newHashSet() : getTermPositions(termIndicator.getTerm());
        }
        if (!(indicator instanceof PhraseIndicator)) {
            throw new IllegalArgumentException();
        }
        PhraseIndicator phraseIndicator = (PhraseIndicator) indicator;
        if (phraseIndicator.length() <= 0) {
            return Sets.newHashSet();
        }
        initTermFrequencies();
        HashSet newHashSet = Sets.newHashSet();
        Iterator<Integer> it = getTermPositions(phraseIndicator.getPhrase()[0]).iterator();
        while (it.hasNext()) {
            int intValue = it.next().intValue();
            if (intValue + phraseIndicator.length() <= this.terms.size()) {
                boolean z = true;
                int i = 1;
                while (true) {
                    if (i >= phraseIndicator.getPhrase().length) {
                        break;
                    }
                    if (!this.terms.get(intValue + i).equals(phraseIndicator.getPhrase()[i])) {
                        z = false;
                        break;
                    }
                    i++;
                }
                if (z) {
                    newHashSet.add(Integer.valueOf(intValue));
                }
            }
        }
        return newHashSet;
    }

    public List<String> getSubjectTerms() {
        ArrayList newArrayList = Lists.newArrayList();
        Iterator<Token> it = getCleanedSubjectTokens().iterator();
        while (it.hasNext()) {
            Iterator it2 = splitToTokens.split(it.next().getNormalizedTerm()).iterator();
            while (it2.hasNext()) {
                String trim = ((String) it2.next()).trim();
                if (trim.length() >= 3) {
                    newArrayList.add(trim);
                }
            }
        }
        return newArrayList;
    }

    public int getNumberOfTerms() {
        initTermFrequencies();
        return this.terms.size();
    }

    public Iterator<Token> tokenIterator() {
        return getTokens().iterator();
    }

    public String getLanguage() {
        return this.language;
    }

    public String getRawSubject() {
        return this.rawSubject;
    }

    public void setRawSubject(String str) {
        this.rawSubject = str;
    }

    public String getCleanedSubject() {
        return E2PUtilities.removeIgnoreCase(this.rawSubject, commonSubjectTerms);
    }

    public List<Token> getCleanedSubjectTokens() {
        if (this.subjectTokens == null) {
            this.subjectTokens = Lists.newArrayList();
            String cleanedSubject = getCleanedSubject();
            Span[] spanArr = Manager.getInstance().getTokenizer(this.language).tokenizePos(cleanedSubject);
            String[] strArr = new String[spanArr.length];
            int length = (-1) * spanArr.length;
            for (int i = 0; i < spanArr.length; i++) {
                Span span = spanArr[i];
                String coveredText = span.getCoveredText(cleanedSubject);
                strArr[i] = coveredText;
                this.subjectTokens.add(new Token(null, coveredText, span.getStart(), length, length));
                length++;
            }
            String[] tag = Manager.getInstance().getPOSTagger(this.language).tag(strArr);
            for (int i2 = 0; i2 < tag.length; i2++) {
                this.subjectTokens.get(i2).setPosTag(tag[i2]);
            }
        }
        return this.subjectTokens;
    }

    public String getRawMessage() {
        return this.rawMessage;
    }

    public String getMessageContent() {
        initAnnotations();
        String str = "";
        for (int i = 0; i < this.lines.length; i++) {
            if (this.annos[i].isMessage()) {
                str = str + this.lines[i] + System.getProperty("line.separator");
            }
        }
        return str;
    }

    public String getQuotedContent() {
        initAnnotations();
        String str = "";
        for (int i = 0; i < this.lines.length; i++) {
            if (this.annos[i].equals(EmailTextAnnotator.LineAnnotation.QUOT)) {
                str = str + removeQuoteChars(this.lines[i]) + System.getProperty("line.separator");
            }
        }
        return str;
    }

    public String getSignatureContent() {
        initAnnotations();
        String str = "";
        for (int i = 0; i < this.lines.length; i++) {
            if (this.annos[i].equals(EmailTextAnnotator.LineAnnotation.SIG)) {
                str = str + this.lines[i] + System.getProperty("line.separator");
            }
        }
        return str;
    }

    public List<Sentence> getSentences() {
        initSentences();
        return this.sentences;
    }

    public List<Token> getTokens() {
        initTokens();
        return this.tokens;
    }

    public int getNumberOfTokens() {
        return getTokens().size();
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static String removeQuoteChars(String str) {
        String str2 = "";
        int i = 0;
        while (i < str.length() && (str.charAt(i) == '>' || str.charAt(i) == ' ')) {
            try {
                i++;
            } catch (Exception e) {
                logger.warn(e.getMessage());
            }
        }
        if (i < str.length()) {
            str2 = str.substring(i);
        }
        return str2;
    }
}
