package de.dfki.km.perspecting.obie.segmentation;

import de.dfki.km.perspecting.obie.model.Model;
import de.dfki.km.perspecting.obie.model.Token;
import de.dfki.km.perspecting.obie.utils.logging.ScoobieLogging;
import de.dfki.km.perspecting.obie.workflow.tasks.Tokenization;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:de/dfki/km/perspecting/obie/segmentation/RegularWhitespaceTokenizer.class */
public class RegularWhitespaceTokenizer implements Tokenization {
    private static final String TOKEN = "TOKEN";
    private static final String ORIGINATOR = "de.dfki.km.perspecting.obie.segmentation.RegularWhitespaceTokenizer";
    private final Logger log = Logger.getLogger(RegularWhitespaceTokenizer.class.getName());

    @Override // de.dfki.km.perspecting.obie.workflow.tasks.Tokenization
    public List<Token> extractTokens(String str, Model<?> model) {
        ScoobieLogging.log("UNKNOWN", "UNKNOWN", str, this.log, Level.FINEST);
        ArrayList arrayList = new ArrayList();
        Matcher matcher = ((Pattern) model.getModel()).matcher(str);
        while (matcher.find()) {
            arrayList.add(new Token(matcher.start(), matcher.end(), str, TOKEN, ORIGINATOR));
        }
        ScoobieLogging.log("UNKNOWN", "UNKNOWN", "Found: " + arrayList.size() + " tokens in text", this.log);
        ScoobieLogging.log("UNKNOWN", "UNKNOWN", arrayList.toString(), this.log, Level.FINEST);
        return arrayList;
    }
}
