package de.dfki.km.perspecting.obie.transducer;

import cc.mallet.classify.Classification;
import de.dfki.km.perspecting.obie.connection.KnowledgeBase;
import de.dfki.km.perspecting.obie.model.Document;
import de.dfki.km.perspecting.obie.model.EntityClassifier;
import de.dfki.km.perspecting.obie.model.SemanticEntity;
import de.dfki.km.perspecting.obie.model.TextCorpus;
import de.dfki.km.perspecting.obie.model.Token;
import de.dfki.km.perspecting.obie.model.TokenSequence;
import de.dfki.km.perspecting.obie.workflow.Transducer;
import java.io.BufferedReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.logging.Logger;

/* loaded from: input_file:de/dfki/km/perspecting/obie/transducer/MaxentEntityClassification.class */
public class MaxentEntityClassification extends Transducer {
    private final Logger log = Logger.getLogger(MaxentEntityClassification.class.getName());

    @Override // de.dfki.km.perspecting.obie.workflow.Transducer
    public void transduce(Document document, KnowledgeBase knowledgeBase, TextCorpus textCorpus) throws Exception {
        EntityClassifier entityClassifier = (EntityClassifier) this.languageMap.get(document.getSource().getLanguage()).getModel();
        if (entityClassifier != null) {
            for (TokenSequence<String> tokenSequence : document.getNounPhrases()) {
                HashSet hashSet = new HashSet();
                Iterator<Token> it = tokenSequence.getTokens().iterator();
                while (it.hasNext()) {
                    hashSet.add(Integer.valueOf(it.next().getStart()));
                }
                new ArrayList();
                List<Classification> test = entityClassifier.test(tokenSequence, tokenSequence.getTokens().get(0).getSentence());
                for (int i = 0; i < tokenSequence.getTokens().size(); i++) {
                    Token token = tokenSequence.getTokens().get(i);
                    for (Classification classification : test) {
                        for (int i2 = 0; i2 < Math.min(classification.getLabelVector().singleSize(), 1); i2++) {
                            SemanticEntity semanticEntity = new SemanticEntity();
                            semanticEntity.setSubjectIndex(-1);
                            semanticEntity.setSubjectURI("_:bnode");
                            if (i == 0) {
                                semanticEntity.setPosition("B");
                            } else {
                                semanticEntity.setPosition("I");
                            }
                            token.addType(semanticEntity, Integer.parseInt(classification.getLabeling().getBestLabel().toString()), classification.getLabeling().getBestValue());
                        }
                    }
                }
            }
        }
    }

    @Override // de.dfki.km.perspecting.obie.workflow.Transducer
    public String compare(Document document, KnowledgeBase knowledgeBase, Reader reader) throws Exception {
        HashSet hashSet = new HashSet();
        BufferedReader bufferedReader = new BufferedReader(reader);
        String readLine = bufferedReader.readLine();
        while (true) {
            String str = readLine;
            if (str == null) {
                break;
            }
            hashSet.add(str);
            readLine = bufferedReader.readLine();
        }
        HashSet hashSet2 = new HashSet();
        for (TokenSequence<SemanticEntity> tokenSequence : document.getEntityTypes()) {
            hashSet2.add(tokenSequence.toString() + "\t" + knowledgeBase.getURI(tokenSequence.getValue().getTypeIndex().get(0).getKey().intValue()));
        }
        new HashSet(hashSet).retainAll(hashSet2);
        return String.format("%i\t%i\t%d\t%d\n", Integer.valueOf(hashSet.size()), Integer.valueOf(hashSet2.size()), Double.valueOf(r0.size() / hashSet.size()), Double.valueOf(r0.size() / hashSet2.size()));
    }
}
