package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.international.french.process.FrenchTokenizer;
import edu.stanford.nlp.international.spanish.process.SpanishTokenizer;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.Tokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.process.WhitespaceTokenizer;
import edu.stanford.nlp.util.Generics;
import java.io.Reader;
import java.io.StringReader;
import java.util.Collections;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

/* loaded from: input_file:WEB-INF/lib/stanford-corenlp-3.4.1.jar:edu/stanford/nlp/pipeline/TokenizerAnnotator.class */
public class TokenizerAnnotator implements Annotator {
    public static final String EOL_PROPERTY = "tokenize.keepeol";
    private final boolean VERBOSE;
    private final TokenizerFactory<CoreLabel> factory;

    /* loaded from: input_file:WEB-INF/lib/stanford-corenlp-3.4.1.jar:edu/stanford/nlp/pipeline/TokenizerAnnotator$TokenizerType.class */
    public enum TokenizerType {
        Unspecified(null, null, "invertible,ptb3Escaping=true"),
        Spanish("es", "SpanishTokenizer", "invertible,ptb3Escaping=true,splitAll=true"),
        English("en", "PTBTokenizer", "invertible,ptb3Escaping=true"),
        German("de", null, "invertible,ptb3Escaping=true"),
        French("fr", "FrenchTokenizer", ""),
        Whitespace(null, "WhitespaceTokenizer", "");

        private final String abbreviation;
        private final String className;
        private final String defaultOptions;
        private static final Map<String, TokenizerType> nameToTokenizerMap = initializeNameMap();
        private static final Map<String, TokenizerType> classToTokenizerMap = initializeClassMap();

        TokenizerType(String str, String str2, String str3) {
            this.abbreviation = str;
            this.className = str2;
            this.defaultOptions = str3;
        }

        public String getDefaultOptions() {
            return this.defaultOptions;
        }

        private static Map<String, TokenizerType> initializeNameMap() {
            Map newHashMap = Generics.newHashMap();
            for (TokenizerType tokenizerType : values()) {
                if (tokenizerType.abbreviation != null) {
                    newHashMap.put(tokenizerType.abbreviation.toUpperCase(), tokenizerType);
                }
                newHashMap.put(tokenizerType.toString().toUpperCase(), tokenizerType);
            }
            return Collections.unmodifiableMap(newHashMap);
        }

        private static Map<String, TokenizerType> initializeClassMap() {
            Map newHashMap = Generics.newHashMap();
            for (TokenizerType tokenizerType : values()) {
                if (tokenizerType.className != null) {
                    newHashMap.put(tokenizerType.className.toUpperCase(), tokenizerType);
                }
            }
            return Collections.unmodifiableMap(newHashMap);
        }

        public static TokenizerType getTokenizerType(Properties properties) {
            String property = properties.getProperty("tokenize.class", null);
            boolean booleanValue = Boolean.valueOf(properties.getProperty("tokenize.whitespace", "false")).booleanValue();
            String property2 = properties.getProperty("tokenize.language", null);
            if (booleanValue) {
                return Whitespace;
            }
            if (property != null) {
                TokenizerType tokenizerType = classToTokenizerMap.get(property.toUpperCase());
                if (tokenizerType == null) {
                    throw new IllegalArgumentException("TokenizerAnnotator: unknown tokenize.class property " + property);
                }
                return tokenizerType;
            }
            if (property2 == null) {
                return Unspecified;
            }
            TokenizerType tokenizerType2 = nameToTokenizerMap.get(property2.toUpperCase());
            if (tokenizerType2 == null) {
                throw new IllegalArgumentException("TokenizerAnnotator: unknown tokenize.language property " + property2);
            }
            return tokenizerType2;
        }
    }

    public TokenizerAnnotator() {
        this(true);
    }

    public TokenizerAnnotator(boolean z) {
        this(z, TokenizerType.English);
    }

    public TokenizerAnnotator(String str) {
        this(true, str, (String) null);
    }

    public TokenizerAnnotator(boolean z, TokenizerType tokenizerType) {
        this(z, tokenizerType.toString());
    }

    public TokenizerAnnotator(boolean z, String str) {
        this(z, str, (String) null);
    }

    public TokenizerAnnotator(boolean z, String str, String str2) {
        this.VERBOSE = z;
        Properties properties = new Properties();
        if (str != null) {
            properties.setProperty("tokenize.language", str);
        }
        this.factory = initFactory(TokenizerType.getTokenizerType(properties), properties, str2);
    }

    public TokenizerAnnotator(boolean z, Properties properties) {
        this(z, properties, (String) null);
    }

    public TokenizerAnnotator(boolean z, Properties properties, String str) {
        this.VERBOSE = z;
        properties = properties == null ? new Properties() : properties;
        this.factory = initFactory(TokenizerType.getTokenizerType(properties), properties, str);
    }

    private TokenizerFactory<CoreLabel> initFactory(TokenizerType tokenizerType, Properties properties, String str) throws IllegalArgumentException {
        TokenizerFactory<CoreLabel> factory;
        String property = properties.getProperty("tokenize.options", null);
        if (property == null) {
            property = str;
        } else if (str != null) {
            property = str + property;
        }
        if (property == null) {
            property = tokenizerType.getDefaultOptions();
        }
        switch (tokenizerType) {
            case Spanish:
                factory = SpanishTokenizer.factory(new CoreLabelTokenFactory(), property);
                break;
            case French:
                factory = FrenchTokenizer.factory(new CoreLabelTokenFactory(), property);
                break;
            case Whitespace:
                factory = new WhitespaceTokenizer.WhitespaceTokenizerFactory(new CoreLabelTokenFactory(), Boolean.valueOf(properties.getProperty(EOL_PROPERTY, "false")).booleanValue() || Boolean.valueOf(properties.getProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "false")).booleanValue());
                break;
            case English:
            case German:
                factory = PTBTokenizer.factory(new CoreLabelTokenFactory(), property);
                break;
            case Unspecified:
                System.err.println("TokenizerAnnotator: No tokenizer type provided. Defaulting to PTBTokenizer.");
                factory = PTBTokenizer.factory(new CoreLabelTokenFactory(), property);
                break;
            default:
                throw new IllegalArgumentException("No valid tokenizer type provided.\nUse -tokenize.language, -tokenize.class, or -tokenize.whitespace \nto specify a tokenizer.");
        }
        return factory;
    }

    public Tokenizer<CoreLabel> getTokenizer(Reader reader) {
        return this.factory.getTokenizer(reader);
    }

    @Override // edu.stanford.nlp.pipeline.Annotator
    public void annotate(Annotation annotation) {
        if (this.VERBOSE) {
            System.err.print("Tokenizing ... ");
        }
        if (!annotation.has(CoreAnnotations.TextAnnotation.class)) {
            throw new RuntimeException("Tokenizer unable to find text in annotation: " + annotation);
        }
        annotation.set(CoreAnnotations.TokensAnnotation.class, getTokenizer(new StringReader((String) annotation.get(CoreAnnotations.TextAnnotation.class))).tokenize());
        if (this.VERBOSE) {
            System.err.println("done.");
            System.err.println("Tokens: " + annotation.get(CoreAnnotations.TokensAnnotation.class));
        }
    }

    @Override // edu.stanford.nlp.pipeline.Annotator
    public Set<Annotator.Requirement> requires() {
        return Collections.emptySet();
    }

    @Override // edu.stanford.nlp.pipeline.Annotator
    public Set<Annotator.Requirement> requirementsSatisfied() {
        return Collections.singleton(TOKENIZE_REQUIREMENT);
    }
}
