package de.dfki.km.perspecting.obie.segmentation;

import de.dfki.km.perspecting.obie.model.Model;
import de.dfki.km.perspecting.obie.model.Record;
import de.dfki.km.perspecting.obie.model.Token;
import de.dfki.km.perspecting.obie.utils.logging.ScoobieLogging;
import de.dfki.km.perspecting.obie.workflow.tasks.Tokenization;
import java.util.ArrayList;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:de/dfki/km/perspecting/obie/segmentation/RegularWhitespaceTokenizer.class */
public class RegularWhitespaceTokenizer implements Tokenization {
    private static final String TOKEN = "TOKEN";
    private final Logger log = Logger.getLogger(RegularWhitespaceTokenizer.class.getName());

    @Override // de.dfki.km.perspecting.obie.workflow.tasks.Tokenization
    public void extractTokens(Record record, Model<?> model) {
        ArrayList arrayList = new ArrayList();
        Matcher matcher = ((Pattern) model.getModel()).matcher(record.getDocument().getPlainTextContent());
        while (matcher.find()) {
            arrayList.add(new Token(matcher.start(), matcher.end(), record.getDocument().getPlainTextContent(), TOKEN, RegularWhitespaceTokenizer.class.getName()));
        }
        ScoobieLogging.log(RegularWhitespaceTokenizer.class.getName(), record.getDocument().getUri(), "Found: " + arrayList.size() + " tokens in text", this.log);
        record.setTokens(arrayList);
    }
}
