package de.dfki.km.perspecting.obie.transducer;

import de.dfki.km.perspecting.obie.connection.KnowledgeBase;
import de.dfki.km.perspecting.obie.model.Document;
import de.dfki.km.perspecting.obie.model.TextCorpus;
import de.dfki.km.perspecting.obie.utils.logging.ScoobieLogging;
import de.dfki.km.perspecting.obie.vocabulary.Language;
import de.dfki.km.perspecting.obie.workflow.Transducer;
import java.util.logging.Logger;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.analysis.lang.LanguageIdentifier;

/* loaded from: input_file:de/dfki/km/perspecting/obie/transducer/NutchLanguageIdentifier.class */
public class NutchLanguageIdentifier extends Transducer {
    private final Language defaultLanguage;
    private final Logger log = Logger.getLogger(NutchLanguageIdentifier.class.getName());
    private final LanguageIdentifier li = new LanguageIdentifier(new Configuration());

    public NutchLanguageIdentifier(Language language) {
        this.defaultLanguage = language;
    }

    public String identifyLanguageFromText(String str) {
        return this.li.identify(str);
    }

    @Override // de.dfki.km.perspecting.obie.workflow.Transducer
    public void transduce(Document document, KnowledgeBase knowledgeBase, TextCorpus textCorpus) throws Exception {
        String identify = this.li.identify(document.getSource().getPlainTextContent());
        ScoobieLogging.log(knowledgeBase.getSession(), document.getSource().getUri(), "identified language as " + identify, this.log);
        if (identify.equals(Language.DE.getValue())) {
            document.getSource().setLanguage(Language.DE.getValue());
        } else if (identify.equals(Language.EN.getValue())) {
            document.getSource().setLanguage(Language.EN.getValue());
        } else {
            ScoobieLogging.log(knowledgeBase.getSession(), document.getSource().getUri(), "set language to default: " + this.defaultLanguage, this.log);
            document.getSource().setLanguage(this.defaultLanguage.getValue());
        }
    }
}
