package de.dfki.km.perspecting.obie.symbolization;

import de.dfki.km.perspecting.obie.model.Annotation;
import de.dfki.km.perspecting.obie.model.Model;
import de.dfki.km.perspecting.obie.model.Record;
import de.dfki.km.perspecting.obie.model.TextPointer;
import de.dfki.km.perspecting.obie.model.Token;
import de.dfki.km.perspecting.obie.workflow.tasks.SymbolClassification;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.logging.Logger;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import uk.ac.shef.wit.simmetrics.similaritymetrics.DiceSimilarity;
import uk.ac.shef.wit.simmetrics.tokenisers.TokeniserQGram3;

/* loaded from: input_file:de/dfki/km/perspecting/obie/symbolization/KNNTokenClassification.class */
public class KNNTokenClassification implements SymbolClassification {
    private final Logger log = Logger.getLogger(KNNTokenClassification.class.getName());
    private static final int K = 5;
    private static final DiceSimilarity dice = new DiceSimilarity(new TokeniserQGram3());

    @Override // de.dfki.km.perspecting.obie.workflow.tasks.SymbolClassification, de.dfki.km.perspecting.obie.workflow.tasks.PhraseClassification
    public void classifySymbols(Record record, Model<?> model) throws Exception {
        IndexSearcher indexSearcher = (IndexSearcher) model.getModel();
        HashSet<Annotation<TextPointer>> hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        HashSet<Token> hashSet3 = new HashSet();
        ArrayList arrayList = new ArrayList();
        Iterator<Annotation<TextPointer>> it = record.getContentSymbols().iterator();
        while (it.hasNext()) {
            for (Token token : it.next().getTokens()) {
                hashSet2.add(token);
            }
        }
        Iterator<Annotation<TextPointer>> it2 = record.getStructuredEntities().iterator();
        while (it2.hasNext()) {
            for (Token token2 : it2.next().getTokens()) {
                hashSet2.add(token2);
            }
        }
        Iterator<Annotation<TextPointer>> it3 = record.getNamedEntities().iterator();
        while (it3.hasNext()) {
            for (Token token3 : it3.next().getTokens()) {
                hashSet2.add(token3);
            }
        }
        for (Annotation<TextPointer> annotation : record.getNounPhrases()) {
            boolean z = false;
            for (Token token4 : annotation.getTokens()) {
                boolean z2 = !hashSet2.contains(token4);
                z |= z2;
                if (z2) {
                    hashSet3.add(token4);
                }
            }
            if (z) {
                hashSet.add(annotation);
            }
        }
        for (Annotation<TextPointer> annotation2 : hashSet) {
            BooleanQuery booleanQuery = new BooleanQuery();
            String lowerCase = annotation2.toString().toLowerCase();
            String[] nGrams = KNNTokenClassificationModel.getNGrams(lowerCase, 3);
            if (nGrams.length < BooleanQuery.getMaxClauseCount()) {
                for (String str : nGrams) {
                    booleanQuery.add(new TermQuery(new Term("object", str)), BooleanClause.Occur.SHOULD);
                }
                List<Annotation<TextPointer>> classifyPhrase = classifyPhrase(booleanQuery, indexSearcher, lowerCase, annotation2);
                Iterator<Annotation<TextPointer>> it4 = classifyPhrase.iterator();
                while (it4.hasNext()) {
                    for (Token token5 : it4.next().getTokens()) {
                        hashSet3.remove(token5);
                    }
                }
                arrayList.addAll(classifyPhrase);
            } else {
                this.log.warning("Too much 3-grams from phrase: \n" + annotation2);
            }
        }
        for (Token token6 : hashSet3) {
            BooleanQuery booleanQuery2 = new BooleanQuery();
            String lowerCase2 = token6.toString().toLowerCase();
            String[] nGrams2 = KNNTokenClassificationModel.getNGrams(lowerCase2, 3);
            if (nGrams2.length < BooleanQuery.getMaxClauseCount()) {
                for (String str2 : nGrams2) {
                    booleanQuery2.add(new TermQuery(new Term("object", str2)), BooleanClause.Occur.SHOULD);
                }
                arrayList.addAll(classifyToken(booleanQuery2, indexSearcher, lowerCase2, token6));
            } else {
                this.log.warning("Too much 3-grams from token: \n" + token6);
            }
        }
        new HashMap();
        Iterator it5 = new ArrayList(arrayList).iterator();
        while (it5.hasNext()) {
        }
        record.setClassifiedSymbols(arrayList);
    }

    public List<Annotation<TextPointer>> classifyToken(BooleanQuery booleanQuery, IndexSearcher indexSearcher, String str, Token token) throws Exception {
        indexSearcher.search(booleanQuery, (Filter) null, K);
        new HashMap();
        return new ArrayList();
    }

    public List<Annotation<TextPointer>> classifyPhrase(BooleanQuery booleanQuery, IndexSearcher indexSearcher, String str, Annotation<TextPointer> annotation) throws Exception {
        return new ArrayList();
    }
}
