package edu.stanford.nlp.ie.machinereading.domains.ace.reader;

import edu.stanford.nlp.ie.machinereading.common.SimpleTokenize;
import edu.stanford.nlp.ie.machinereading.common.StringDictionary;
import edu.stanford.nlp.ling.tokensregex.types.Expressions;
import edu.stanford.nlp.trees.Span;
import edu.stanford.nlp.util.Generics;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;

/* loaded from: input_file:edu/stanford/nlp/ie/machinereading/domains/ace/reader/AceToken.class */
public class AceToken {
    private String mLiteral;
    private int mWord;
    private int mCase;
    private int[] mSuffixes;
    private int mLemma;
    private int mPos;
    private int mChunk;
    private int mNerc;
    private Span mByteOffset;
    private Span mRawByteOffset;
    private int mSentence;
    private String mMassiClass;
    private String mMassiBbn;
    private String mMassiWnss;
    public static final Map<Integer, ArrayList<Integer>> PROX_CLASSES;
    private static final int PROXIMITY_CLASS_SIZE = 5;
    private static final Pattern SGML_PATTERN;
    public static final int CASE_OTHER = 0;
    public static final int CASE_ALLCAPS = 1;
    public static final int CASE_ALLCAPSORDOTS = 2;
    public static final int CASE_CAPINI = 3;
    public static final int CASE_INCAP = 4;
    public static final int CASE_ALLDIGITS = 5;
    public static final int CASE_ALLDIGITSORDOTS = 6;
    private static Map<String, String> LOC_GAZ = null;
    private static Map<String, String> FIRST_GAZ = null;
    private static Map<String, String> LAST_GAZ = null;
    private static Map<String, String> TRIGGER_GAZ = null;
    public static final StringDictionary WORDS = new StringDictionary("words");
    public static final StringDictionary LEMMAS = new StringDictionary("lemmas");
    public static final StringDictionary OTHERS = new StringDictionary("others");

    public static void loadGazetteers(String str) throws FileNotFoundException, IOException {
        System.err.print("Loading location gazetteer... ");
        LOC_GAZ = Generics.newHashMap();
        loadDictionary(LOC_GAZ, str + File.separator + "world_small.gaz.nonambiguous");
        System.err.println("done.");
        System.err.print("Loading first-name gazetteer... ");
        FIRST_GAZ = Generics.newHashMap();
        loadDictionary(FIRST_GAZ, str + File.separator + "per_first.gaz");
        System.err.println("done.");
        System.err.print("Loading last-name gazetteer... ");
        LAST_GAZ = Generics.newHashMap();
        loadDictionary(LAST_GAZ, str + File.separator + "per_last.gaz");
        System.err.println("done.");
        System.err.print("Loading trigger-word gazetteer... ");
        TRIGGER_GAZ = Generics.newHashMap();
        loadDictionary(TRIGGER_GAZ, str + File.separator + "triggers.gaz");
        System.err.println("done.");
    }

    private static void loadDictionary(Map<String, String> map, String str) throws FileNotFoundException, IOException {
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return;
            }
            ArrayList<String> arrayList = SimpleTokenize.tokenize(readLine);
            if (arrayList.size() > 0) {
                String lowerCase = arrayList.get(0).toLowerCase();
                if (arrayList.size() == 1) {
                    map.put(lowerCase, "true");
                } else {
                    map.put(lowerCase, arrayList.get(1));
                }
            }
        }
    }

    public static boolean isLocation(String str) {
        return exists(LOC_GAZ, str);
    }

    public static boolean isFirstName(String str) {
        return exists(FIRST_GAZ, str);
    }

    public static boolean isLastName(String str) {
        return exists(LAST_GAZ, str);
    }

    public static String isTriggerWord(String str) {
        return TRIGGER_GAZ.get(str);
    }

    public static boolean exists(Map<String, String> map, String str) {
        return map.get(str) != null;
    }

    public static void loadProximityClasses(String str) throws IOException {
        System.err.println("Loading proximity classes...");
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    bufferedReader.close();
                    System.err.println("Finished loading proximity classes.");
                    return;
                }
                ArrayList<String> arrayList = SimpleTokenize.tokenize(readLine);
                if (arrayList.size() > 0) {
                    Integer valueOf = Integer.valueOf(WORDS.get(arrayList.get(0)));
                    ArrayList<Integer> arrayList2 = new ArrayList<>();
                    for (int i = 0; i < arrayList.size() && i < 5; i++) {
                        arrayList2.add(Integer.valueOf(WORDS.get(arrayList.get(i))));
                    }
                    PROX_CLASSES.put(valueOf, arrayList2);
                }
            }
        } catch (IOException e) {
            System.err.println("Warning: no proximity database found.");
        }
    }

    public String getLiteral() {
        return this.mLiteral;
    }

    public int getWord() {
        return this.mWord;
    }

    public int getCase() {
        return this.mCase;
    }

    public int[] getSuffixes() {
        return this.mSuffixes;
    }

    public int getLemma() {
        return this.mLemma;
    }

    public int getPos() {
        return this.mPos;
    }

    public int getChunk() {
        return this.mChunk;
    }

    public int getNerc() {
        return this.mNerc;
    }

    public Span getByteOffset() {
        return this.mByteOffset;
    }

    public int getByteStart() {
        return this.mByteOffset.start();
    }

    public int getByteEnd() {
        return this.mByteOffset.end();
    }

    public int getSentence() {
        return this.mSentence;
    }

    public Span getRawByteOffset() {
        return this.mRawByteOffset;
    }

    public int getRawByteStart() {
        return this.mRawByteOffset.start();
    }

    public int getRawByteEnd() {
        return this.mRawByteOffset.end();
    }

    public void setMassiClass(String str) {
        this.mMassiClass = str;
    }

    public String getMassiClass() {
        return this.mMassiClass;
    }

    public void setMassiBbn(String str) {
        this.mMassiBbn = str;
    }

    public String getMassiBbn() {
        return this.mMassiBbn;
    }

    public void setMassiWnss(String str) {
        this.mMassiWnss = str;
    }

    public String getMassiWnss() {
        return this.mMassiWnss;
    }

    public static boolean isSgml(String str) {
        return SGML_PATTERN.matcher(str).find(0);
    }

    public static String removeSpaces(String str) {
        return str == null ? str : str.replaceAll(StringUtils.SPACE, Expressions.VAR_SELF);
    }

    private static int detectCase(String str) {
        boolean z = true;
        int i = 0;
        while (true) {
            if (i >= str.length()) {
                break;
            }
            if (!Character.isUpperCase(str.charAt(i))) {
                z = false;
                break;
            }
            i++;
        }
        if (z) {
            return 1;
        }
        boolean z2 = true;
        if (Character.isUpperCase(str.charAt(0))) {
            int i2 = 0;
            while (true) {
                if (i2 < str.length()) {
                    if (!Character.isUpperCase(str.charAt(i2)) && str.charAt(i2) != '.') {
                        z2 = false;
                        break;
                    }
                    i2++;
                } else {
                    break;
                }
            }
        } else {
            z2 = false;
        }
        if (z2) {
            return 2;
        }
        boolean z3 = false;
        if (Character.isUpperCase(str.charAt(0))) {
            z3 = true;
        }
        if (z3) {
            return 3;
        }
        boolean z4 = false;
        int i3 = 1;
        while (true) {
            if (i3 >= str.length()) {
                break;
            }
            if (Character.isUpperCase(str.charAt(i3))) {
                z4 = true;
                break;
            }
            i3++;
        }
        if (z4) {
            return 4;
        }
        boolean z5 = false;
        int i4 = 0;
        while (true) {
            if (i4 >= str.length()) {
                break;
            }
            if (!Character.isDigit(str.charAt(i4))) {
                z5 = false;
                break;
            }
            i4++;
        }
        if (z5) {
            return 5;
        }
        boolean z6 = true;
        if (Character.isDigit(str.charAt(0))) {
            int i5 = 0;
            while (true) {
                if (i5 < str.length()) {
                    if (!Character.isDigit(str.charAt(i5)) && str.charAt(i5) != '.' && str.charAt(i5) != ',') {
                        z6 = false;
                        break;
                    }
                    i5++;
                } else {
                    break;
                }
            }
        } else {
            z6 = false;
        }
        return z6 ? 6 : 0;
    }

    private static int[] extractSuffixes(String str) {
        String lowerCase = str.toLowerCase();
        ArrayList arrayList = new ArrayList();
        for (int i = 2; i <= 4 && lowerCase.length() >= i; i++) {
            try {
                arrayList.add(Integer.valueOf(WORDS.get(lowerCase.substring(lowerCase.length() - i))));
            } catch (RuntimeException e) {
            }
        }
        int[] iArr = new int[arrayList.size()];
        for (int i2 = 0; i2 < arrayList.size(); i2++) {
            iArr[i2] = ((Integer) arrayList.get(i2)).intValue();
        }
        return iArr;
    }

    public AceToken(String str, String str2, String str3, String str4, String str5, String str6, String str7, int i) {
        this.mLiteral = str;
        if (str == null) {
            this.mWord = -1;
            this.mCase = -1;
            this.mSuffixes = null;
        } else {
            this.mWord = WORDS.get(removeSpaces(str), false);
            this.mCase = detectCase(str);
            this.mSuffixes = extractSuffixes(str);
        }
        if (str2 == null) {
            this.mLemma = -1;
        } else {
            this.mLemma = LEMMAS.get(removeSpaces(str2), false);
        }
        if (str3 == null) {
            this.mPos = -1;
        } else {
            this.mPos = OTHERS.get(str3, false);
        }
        if (str4 == null) {
            this.mChunk = -1;
        } else {
            this.mChunk = OTHERS.get(str4, false);
        }
        if (str5 == null) {
            this.mNerc = -1;
        } else {
            this.mNerc = OTHERS.get(str5, false);
        }
        if (str6 != null && str7 != null) {
            this.mByteOffset = new Span(Integer.parseInt(str6), Integer.parseInt(str7));
            this.mRawByteOffset = new Span(Integer.parseInt(str6), Integer.parseInt(str7));
        }
        this.mSentence = i;
        this.mMassiClass = "";
        this.mMassiBbn = "";
        this.mMassiWnss = "";
    }

    public int adjustPhrasePositions(int i, String str) {
        if (isSgml(str)) {
            i += this.mByteOffset.end() - this.mByteOffset.start();
            this.mByteOffset.setStart(-1);
            this.mByteOffset.setEnd(-1);
        } else {
            this.mByteOffset.setStart(this.mByteOffset.start() - i);
            this.mByteOffset.setEnd(this.mByteOffset.end() - i);
        }
        return i;
    }

    public String display() {
        return this.mByteOffset != null ? "['" + WORDS.get(this.mWord) + "', " + OTHERS.get(this.mPos) + ", " + this.mByteOffset.start() + ", " + this.mByteOffset.end() + "]" : "['" + WORDS.get(this.mWord) + "', " + OTHERS.get(this.mPos) + "]";
    }

    public String toString() {
        return display();
    }

    static {
        WORDS.setMode(true);
        LEMMAS.setMode(true);
        OTHERS.setMode(true);
        PROX_CLASSES = Generics.newHashMap();
        SGML_PATTERN = Pattern.compile(RobustTokenizer.SGML);
    }
}
