package org.cleartk.classifier.feature.extractor.simple;

import java.util.Collections;
import java.util.List;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.cleartk.classifier.Feature;
import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
import org.cleartk.classifier.feature.proliferate.CharacterNGramProliferator;

/* loaded from: input_file:org/cleartk/classifier/feature/extractor/simple/CharacterCategoryPatternExtractor.class */
public class CharacterCategoryPatternExtractor implements SimpleNamedFeatureExtractor {
    private PatternType patternType;
    private String name;

    /* renamed from: org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor$1, reason: invalid class name */
    /* loaded from: input_file:org/cleartk/classifier/feature/extractor/simple/CharacterCategoryPatternExtractor$1.class */
    static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$org$cleartk$classifier$feature$extractor$simple$CharacterCategoryPatternExtractor$PatternType = new int[PatternType.values().length];

        static {
            try {
                $SwitchMap$org$cleartk$classifier$feature$extractor$simple$CharacterCategoryPatternExtractor$PatternType[PatternType.ONE_PER_CHAR.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$org$cleartk$classifier$feature$extractor$simple$CharacterCategoryPatternExtractor$PatternType[PatternType.REPEATS_MERGED.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
            try {
                $SwitchMap$org$cleartk$classifier$feature$extractor$simple$CharacterCategoryPatternExtractor$PatternType[PatternType.REPEATS_AS_KLEENE_PLUS.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
        }
    }

    /* loaded from: input_file:org/cleartk/classifier/feature/extractor/simple/CharacterCategoryPatternExtractor$PatternType.class */
    public enum PatternType {
        ONE_PER_CHAR,
        REPEATS_MERGED,
        REPEATS_AS_KLEENE_PLUS
    }

    public CharacterCategoryPatternExtractor() {
        this(PatternType.ONE_PER_CHAR);
    }

    public CharacterCategoryPatternExtractor(PatternType patternType) {
        this.patternType = patternType;
        switch (AnonymousClass1.$SwitchMap$org$cleartk$classifier$feature$extractor$simple$CharacterCategoryPatternExtractor$PatternType[this.patternType.ordinal()]) {
            case CharacterNGramProliferator.LEFT_TO_RIGHT /* 1 */:
                this.name = "CharPattern";
                return;
            case 2:
                this.name = "CharPatternRepeatsMerged";
                return;
            case 3:
                this.name = "CharPatternRepeatsAsKleenePlus";
                return;
            default:
                return;
        }
    }

    @Override // org.cleartk.classifier.feature.extractor.simple.SimpleNamedFeatureExtractor
    public String getFeatureName() {
        return this.name;
    }

    @Override // org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor
    public List<Feature> extract(JCas jCas, Annotation annotation) throws CleartkExtractorException {
        StringBuilder sb = new StringBuilder();
        String coveredText = annotation.getCoveredText();
        String str = null;
        boolean z = false;
        for (int i = 0; i < coveredText.length(); i++) {
            String classifyChar = classifyChar(coveredText.charAt(i));
            switch (AnonymousClass1.$SwitchMap$org$cleartk$classifier$feature$extractor$simple$CharacterCategoryPatternExtractor$PatternType[this.patternType.ordinal()]) {
                case CharacterNGramProliferator.LEFT_TO_RIGHT /* 1 */:
                    sb.append(classifyChar);
                    break;
                case 2:
                    if (classifyChar.equals(str)) {
                        break;
                    } else {
                        sb.append(classifyChar);
                        break;
                    }
                case 3:
                    if (classifyChar.equals(str)) {
                        if (z) {
                            break;
                        } else {
                            sb.append('+');
                            z = true;
                            break;
                        }
                    } else {
                        sb.append(classifyChar);
                        z = false;
                        break;
                    }
            }
            str = classifyChar;
        }
        return Collections.singletonList(new Feature(this.name, sb.toString()));
    }

    protected String classifyChar(char c) {
        int type = Character.getType(c);
        switch (type) {
            case CharacterNGramProliferator.RIGHT_TO_LEFT /* 0 */:
                return "Cn";
            case CharacterNGramProliferator.LEFT_TO_RIGHT /* 1 */:
                return "Lu";
            case 2:
                return "Ll";
            case 3:
                return "Lt";
            case 4:
                return "Lm";
            case 5:
                return "Lo";
            case 6:
                return "Mn";
            case 7:
                return "Me";
            case 8:
                return "Mc";
            case 9:
                return "Nd";
            case 10:
                return "Nl";
            case 11:
                return "No";
            case 12:
                return "Zs";
            case 13:
                return "Zl";
            case 14:
                return "Zp";
            case 15:
                return "CC";
            case 16:
                return "Cf";
            case 17:
            default:
                throw new RuntimeException("Unknown character type: " + type);
            case 18:
                return "Co";
            case 19:
                return "Cs";
            case 20:
                return "Pd";
            case 21:
                return "Ps";
            case 22:
                return "Pe";
            case 23:
                return "Pc";
            case 24:
                return "Po";
            case 25:
                return "Sm";
            case 26:
                return "Sc";
            case 27:
                return "Sk";
            case 28:
                return "So";
            case 29:
                return "Pi";
            case 30:
                return "Pf";
        }
    }
}
