package de.dfki.km.perspecting.obie.transducer;

import de.dfki.km.perspecting.obie.connection.KnowledgeBase;
import de.dfki.km.perspecting.obie.model.Document;
import de.dfki.km.perspecting.obie.model.TextCorpus;
import de.dfki.km.perspecting.obie.utils.logging.ScoobieLogging;
import de.dfki.km.perspecting.obie.workflow.Transducer;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Locale;
import java.util.logging.Logger;

/* loaded from: input_file:de/dfki/km/perspecting/obie/transducer/JavaWordSegmenter.class */
public class JavaWordSegmenter extends Transducer {
    private final Logger log = Logger.getLogger(JavaWordSegmenter.class.getName());

    @Override // de.dfki.km.perspecting.obie.workflow.Transducer
    public void transduce(Document document, KnowledgeBase knowledgeBase, TextCorpus textCorpus) throws Exception {
        String plainTextContent = document.getSource().getPlainTextContent();
        BreakIterator wordInstance = BreakIterator.getWordInstance(new Locale(document.getSource().getLanguage()));
        wordInstance.setText(plainTextContent);
        int first = wordInstance.first();
        int i = 0;
        int next = wordInstance.next();
        while (true) {
            int i2 = next;
            if (i2 == -1) {
                ScoobieLogging.log(knowledgeBase.getSession(), document.getSource().getUri(), "Found: " + i + " tokens in text", this.log);
                return;
            }
            if (!Character.isWhitespace(plainTextContent.charAt(first))) {
                document.getData().createToken(first, i2);
                i++;
            }
            first = i2;
            next = wordInstance.next();
        }
    }

    public String[] segment(String str) {
        BreakIterator wordInstance = BreakIterator.getWordInstance();
        wordInstance.setText(str);
        int first = wordInstance.first();
        ArrayList arrayList = new ArrayList();
        int next = wordInstance.next();
        while (true) {
            int i = next;
            if (i == -1) {
                return (String[]) arrayList.toArray(new String[arrayList.size()]);
            }
            if (!Character.isWhitespace(str.charAt(first))) {
                arrayList.add(str.substring(first, i));
            }
            first = i;
            next = wordInstance.next();
        }
    }
}
