package de.dfki.km.perspecting.obie.symbolization;

import de.dfki.km.perspecting.obie.connection.OntologySession;
import de.dfki.km.perspecting.obie.model.Annotation;
import de.dfki.km.perspecting.obie.model.Model;
import de.dfki.km.perspecting.obie.model.TextPointer;
import de.dfki.km.perspecting.obie.model.Token;
import de.dfki.km.perspecting.obie.template.FilterContext;
import de.dfki.km.perspecting.obie.workflow.tasks.SymbolClassification;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import uk.ac.shef.wit.simmetrics.similaritymetrics.DiceSimilarity;
import uk.ac.shef.wit.simmetrics.tokenisers.TokeniserQGram3;

/* loaded from: input_file:de/dfki/km/perspecting/obie/symbolization/KNNTokenClassification.class */
public class KNNTokenClassification implements SymbolClassification {
    private final Logger log = Logger.getLogger(KNNTokenClassification.class.getName());
    private static final int K = 5;
    private static final DiceSimilarity dice = new DiceSimilarity(new TokeniserQGram3());

    @Override // de.dfki.km.perspecting.obie.workflow.tasks.SymbolClassification
    public List<Annotation<TextPointer>> classifySymbols(List<Annotation<TextPointer>> list, List<Annotation<TextPointer>> list2, List<Annotation<TextPointer>> list3, List<Annotation<TextPointer>> list4, OntologySession ontologySession, FilterContext filterContext, Model<?> model) throws Exception {
        IndexSearcher indexSearcher = (IndexSearcher) model.getModel2();
        HashSet<Annotation<TextPointer>> hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        HashSet<Token> hashSet3 = new HashSet();
        ArrayList arrayList = new ArrayList();
        Iterator<Annotation<TextPointer>> it = list.iterator();
        while (it.hasNext()) {
            for (Token token : it.next().getTokens()) {
                hashSet2.add(token);
            }
        }
        Iterator<Annotation<TextPointer>> it2 = list2.iterator();
        while (it2.hasNext()) {
            for (Token token2 : it2.next().getTokens()) {
                hashSet2.add(token2);
            }
        }
        Iterator<Annotation<TextPointer>> it3 = list3.iterator();
        while (it3.hasNext()) {
            for (Token token3 : it3.next().getTokens()) {
                hashSet2.add(token3);
            }
        }
        for (Annotation<TextPointer> annotation : list4) {
            boolean z = false;
            for (Token token4 : annotation.getTokens()) {
                boolean z2 = !hashSet2.contains(token4);
                z |= z2;
                if (z2) {
                    hashSet3.add(token4);
                }
            }
            if (z) {
                hashSet.add(annotation);
            }
        }
        for (Annotation<TextPointer> annotation2 : hashSet) {
            BooleanQuery booleanQuery = new BooleanQuery();
            String lowerCase = annotation2.asPhrase().toLowerCase();
            String[] nGrams = KNNTokenClassificationModel.getNGrams(lowerCase, 3);
            if (nGrams.length < BooleanQuery.getMaxClauseCount()) {
                for (String str : nGrams) {
                    booleanQuery.add(new TermQuery(new Term("object", str)), BooleanClause.Occur.SHOULD);
                }
                List<Annotation<TextPointer>> classifyPhrase = classifyPhrase(booleanQuery, indexSearcher, lowerCase, annotation2);
                Iterator<Annotation<TextPointer>> it4 = classifyPhrase.iterator();
                while (it4.hasNext()) {
                    for (Token token5 : it4.next().getTokens()) {
                        hashSet3.remove(token5);
                    }
                }
                arrayList.addAll(classifyPhrase);
            } else {
                this.log.warning("Too much 3-grams from phrase: \n" + annotation2);
            }
        }
        for (Token token6 : hashSet3) {
            BooleanQuery booleanQuery2 = new BooleanQuery();
            String lowerCase2 = token6.toString().toLowerCase();
            String[] nGrams2 = KNNTokenClassificationModel.getNGrams(lowerCase2, 3);
            if (nGrams2.length < BooleanQuery.getMaxClauseCount()) {
                for (String str2 : nGrams2) {
                    booleanQuery2.add(new TermQuery(new Term("object", str2)), BooleanClause.Occur.SHOULD);
                }
                arrayList.addAll(classifyToken(booleanQuery2, indexSearcher, lowerCase2, token6));
            } else {
                this.log.warning("Too much 3-grams from token: \n" + token6);
            }
        }
        HashMap hashMap = new HashMap();
        Iterator it5 = new ArrayList(arrayList).iterator();
        while (it5.hasNext()) {
            Annotation annotation3 = (Annotation) it5.next();
            String str3 = String.valueOf(((TextPointer) annotation3.getValue()).getData()) + " " + annotation3.getType();
            Annotation annotation4 = (Annotation) hashMap.get(str3);
            if (annotation4 == null) {
                hashMap.put(str3, annotation3);
            } else if (annotation4.getBelief() < annotation3.getBelief()) {
                arrayList.remove(annotation4);
                hashMap.put(str3, annotation3);
            } else {
                arrayList.remove(annotation3);
            }
        }
        return arrayList;
    }

    public List<Annotation<TextPointer>> classifyToken(BooleanQuery booleanQuery, IndexSearcher indexSearcher, String str, Token token) throws Exception {
        TopDocs search = indexSearcher.search(booleanQuery, (Filter) null, K);
        HashMap hashMap = new HashMap();
        ArrayList arrayList = new ArrayList();
        double d = 0.0d;
        double d2 = 0.0d;
        HashSet hashSet = new HashSet();
        for (ScoreDoc scoreDoc : search.scoreDocs) {
            String str2 = indexSearcher.doc(scoreDoc.doc).get("value");
            int parseInt = Integer.parseInt(indexSearcher.doc(scoreDoc.doc).get("key"));
            int parseInt2 = Integer.parseInt(indexSearcher.doc(scoreDoc.doc).get("predicate"));
            double parseDouble = Double.parseDouble(indexSearcher.doc(scoreDoc.doc).get("belief"));
            double similarity = dice.getSimilarity(str2, str);
            if (parseDouble > d) {
                d = parseDouble;
            }
            if (similarity > d2) {
                d2 = similarity;
            }
            if (!hashSet.contains(Integer.valueOf(parseInt2))) {
                if (parseDouble + similarity >= 1.5d) {
                    double d3 = (parseDouble * 2.0d) / 3.0d;
                    double d4 = d3 + ((1.0d - d3) * similarity);
                    arrayList.add(new Annotation(indexSearcher.doc(scoreDoc.doc).get("predicate"), new TextPointer(token.getStart(), token.getEnd(), str, parseInt, d4), getClass().getName(), d4, -1, token));
                } else if (parseDouble + similarity >= 1.0d) {
                    double d5 = (parseDouble * 2.0d) / 3.0d;
                    Integer num = (Integer) hashMap.get(Integer.valueOf(parseInt2));
                    if (num == null) {
                        num = 0;
                    }
                    hashMap.put(Integer.valueOf(parseInt2), Integer.valueOf(num.intValue() + 1));
                }
                hashSet.add(Integer.valueOf(parseInt2));
            }
        }
        hashSet.clear();
        Map.Entry entry = null;
        for (Map.Entry entry2 : hashMap.entrySet()) {
            if (entry == null || ((Integer) entry2.getValue()).intValue() > ((Integer) entry.getValue()).intValue()) {
                entry = entry2;
                hashSet.clear();
                hashSet.add((Integer) entry2.getKey());
            } else if (entry2.getValue() == entry.getValue()) {
                hashSet.add((Integer) entry2.getKey());
            }
        }
        Iterator it = hashSet.iterator();
        while (it.hasNext()) {
            int intValue = ((Integer) it.next()).intValue();
            double d6 = (d * 2.0d) / 3.0d;
            double d7 = d6 + ((1.0d - d6) * d2);
            arrayList.add(new Annotation(Integer.toString(intValue), new TextPointer(token.getStart(), token.getEnd(), str, -1, d7), getClass().getName(), d7, -1, token));
        }
        return arrayList;
    }

    public List<Annotation<TextPointer>> classifyPhrase(BooleanQuery booleanQuery, IndexSearcher indexSearcher, String str, Annotation<TextPointer> annotation) throws Exception {
        ArrayList arrayList = new ArrayList();
        for (ScoreDoc scoreDoc : indexSearcher.search(booleanQuery, (Filter) null, K).scoreDocs) {
            String str2 = indexSearcher.doc(scoreDoc.doc).get("value");
            int parseInt = Integer.parseInt(indexSearcher.doc(scoreDoc.doc).get("key"));
            double parseDouble = Double.parseDouble(indexSearcher.doc(scoreDoc.doc).get("belief"));
            double similarity = dice.getSimilarity(str2, str);
            if (parseDouble + similarity > 1.5d) {
                double d = (parseDouble * 2.0d) / 3.0d;
                double d2 = d + ((1.0d - d) * similarity);
                arrayList.add(new Annotation(indexSearcher.doc(scoreDoc.doc).get("predicate"), new TextPointer(annotation.getValue().getA(), annotation.getValue().getB(), str2, parseInt, d2), getClass().getName(), d2, -1, annotation.getTokens()));
            }
        }
        return arrayList;
    }
}
