package edu.washington.cs.knowitall.argumentidentifier;

import ch.qos.logback.classic.joran.action.InsertFromJNDIAction;
import ch.qos.logback.core.joran.util.beans.BeanUtil;
import com.google.common.collect.ImmutableList;
import com.hp.hpl.jena.sparql.sse.Tags;
import edu.stanford.nlp.ling.tokensregex.types.Expressions;
import edu.stanford.nlp.trees.international.arabic.ATBTreeUtils;
import edu.washington.cs.knowitall.argumentidentifier.ArgLearner;
import edu.washington.cs.knowitall.nlp.OpenNlpUtils;
import edu.washington.cs.knowitall.nlp.extraction.ChunkedBinaryExtraction;
import edu.washington.cs.knowitall.nlp.extraction.ChunkedExtraction;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Vector;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;

/* loaded from: input_file:WEB-INF/lib/reverb-core-1.4.1.jar:edu/washington/cs/knowitall/argumentidentifier/ArgSubstructureFeatureGenerator.class */
public class ArgSubstructureFeatureGenerator {
    private ArgLearner.Mode mode;
    private Vector<Pattern> patterns = null;
    private Vector<String> featureNames = null;
    private PatternExtractor pattern_extractor = new PatternExtractor();
    static int CHUNK_FEATURE = 0;
    static int TAG_FEATURE = 1;
    static int LEX_FEATURE = 2;
    static int PARSE_FEATURE = 3;
    static String B_ARG = "B-ARG";
    static String I_ARG = "I-ARG";
    static String O = "O";
    private static String CAPS = "[A-Zçêòéñ\u0082ì]";
    private static String ALPHA = "[A-Zçêòéñ\u0082ìa-z\u0088\u0093\u009d\u008e\u0097\u008d\u0095]";
    private static String ALPHANUM = "[A-Zçêòéñ\u0082ìa-z\u0088\u0093\u009d\u008e\u0097\u008d\u00950-9]";
    private static String PUNT = "[,\\.;:?!()]";
    private static HashSet<String> lexicalizeIfTag = null;
    private static HashSet<String> lexicalizeIfWord = null;
    private static HashSet<String> stopWord = null;

    public ArgSubstructureFeatureGenerator(ArgLearner.Mode mode) {
        this.mode = mode;
        init();
    }

    private void init() {
        lexicalizeIfTag = new HashSet<>();
        lexicalizeIfTag.add("IN");
        lexicalizeIfTag.add("MD");
        lexicalizeIfTag.add("DT");
        lexicalizeIfTag.add("WDT");
        lexicalizeIfTag.add("WP");
        lexicalizeIfTag.add("RP");
        lexicalizeIfTag.add("PRT");
        lexicalizeIfTag.add("WRB");
        lexicalizeIfTag.add("WRB");
        lexicalizeIfTag.add("CC");
        lexicalizeIfWord = new HashSet<>();
        lexicalizeIfWord.add("be");
        lexicalizeIfWord.add("been");
        lexicalizeIfWord.add(BeanUtil.PREFIX_GETTER_IS);
        lexicalizeIfWord.add("was");
        lexicalizeIfWord.add("are");
        lexicalizeIfWord.add("were");
        lexicalizeIfWord.add("has");
        lexicalizeIfWord.add("had");
        lexicalizeIfWord.add("have");
        lexicalizeIfWord.add("n't");
        lexicalizeIfWord.add("not");
        lexicalizeIfWord.add("that");
        lexicalizeIfWord.add("which");
        lexicalizeIfWord.add(InsertFromJNDIAction.AS_ATTR);
        lexicalizeIfWord.add("but");
        lexicalizeIfWord.add(Tags.tagAnd);
        lexicalizeIfWord.add(Tags.tagOr);
        lexicalizeIfWord.add("said");
        lexicalizeIfWord.add("say");
        lexicalizeIfWord.add("says");
        stopWord = new HashSet<>();
        stopWord.add("said");
        stopWord.add("say");
        stopWord.add("says");
        this.patterns = new Vector<>();
        this.featureNames = new Vector<>();
        this.featureNames.add("INITCAP");
        this.patterns.add(Pattern.compile(CAPS + ".*"));
        this.featureNames.add("ALLDIGITS");
        this.patterns.add(Pattern.compile("[0-9]*"));
        this.featureNames.add("ALLCAPS");
        this.patterns.add(Pattern.compile(CAPS + "+"));
        this.featureNames.add("CONTAINSDIGITS");
        this.patterns.add(Pattern.compile(".*[0-9].*"));
        this.featureNames.add("ALLDIGITS");
        this.patterns.add(Pattern.compile("[0-9]+"));
        this.featureNames.add("CONTAINSDOTS");
        this.patterns.add(Pattern.compile("[^\\.]*\\..*"));
        this.featureNames.add("CONTAINSDASH");
        this.patterns.add(Pattern.compile(ALPHANUM + "+-" + ALPHANUM + "*"));
        this.featureNames.add("ACRO");
        this.patterns.add(Pattern.compile("[A-Z][A-Z\\.]*\\.[A-Z\\.]*"));
        this.featureNames.add("LONELYINITIAL");
        this.patterns.add(Pattern.compile(CAPS + "\\."));
        this.featureNames.add("SINGLECHAR");
        this.patterns.add(Pattern.compile(ALPHA));
        this.featureNames.add("CAPLETTER");
        this.patterns.add(Pattern.compile(CAPS));
        this.featureNames.add(ATBTreeUtils.puncTag);
        this.patterns.add(Pattern.compile(PUNT));
    }

    private String convertInstancesToFeatures(Vector<Vector<String>> vector, Boolean bool) {
        if (vector == null) {
            return "";
        }
        StringBuilder sb = new StringBuilder();
        Iterator<Vector<String>> it = vector.iterator();
        while (it.hasNext()) {
            Vector<String> next = it.next();
            String lastElement = next.lastElement();
            Iterator<String> it2 = next.iterator();
            while (it2.hasNext()) {
                String next2 = it2.next();
                if (bool.booleanValue() || !next2.equals(lastElement)) {
                    sb.append(next2 + StringUtils.SPACE);
                }
            }
            sb.append("\n");
        }
        return sb.toString();
    }

    private ArrayList<String> getSetFeatures(ChunkedExtraction chunkedExtraction, int i, boolean z) {
        boolean matchesRelativeClause;
        ArrayList<String> arrayList = new ArrayList<>();
        boolean z2 = false;
        boolean z3 = false;
        boolean z4 = false;
        boolean z5 = false;
        boolean z6 = false;
        boolean z7 = false;
        boolean z8 = false;
        boolean z9 = false;
        boolean z10 = false;
        boolean z11 = false;
        boolean z12 = false;
        boolean z13 = false;
        boolean z14 = false;
        boolean z15 = false;
        boolean z16 = false;
        if (this.mode != ArgLearner.Mode.LEFT) {
            z3 = this.pattern_extractor.adjRelation(chunkedExtraction);
            z4 = this.pattern_extractor.complementClause(chunkedExtraction);
            z5 = this.pattern_extractor.nestedRelation2(chunkedExtraction);
            z6 = this.pattern_extractor.npInfinitiveClause(chunkedExtraction);
            z7 = this.pattern_extractor.infinitiveClause(chunkedExtraction);
            z8 = this.pattern_extractor.doubleNP(chunkedExtraction);
            z9 = this.pattern_extractor.startsList(chunkedExtraction);
            z10 = this.pattern_extractor.ifClause(chunkedExtraction);
            z11 = this.pattern_extractor.compoundVerb(chunkedExtraction);
            matchesRelativeClause = this.pattern_extractor.npRelativeClause(chunkedExtraction);
            z2 = this.pattern_extractor.nestedRelation1(chunkedExtraction);
        } else {
            z12 = this.pattern_extractor.nextIsThat(chunkedExtraction, i - 1);
            z13 = this.pattern_extractor.matchesCommaBeforeVerb(chunkedExtraction);
            z14 = this.pattern_extractor.matchesPPBeforeVerb(chunkedExtraction);
            z15 = this.pattern_extractor.matchesNPVerb(chunkedExtraction);
            z16 = this.pattern_extractor.matchesQuotes(chunkedExtraction);
            matchesRelativeClause = this.pattern_extractor.matchesRelativeClause(chunkedExtraction, i);
        }
        if (matchesRelativeClause) {
            arrayList.add(ChunkedBinaryExtraction.REL);
        } else if (z5) {
            arrayList.add("NEST2");
        }
        if (z2) {
            arrayList.add("NEST1");
        }
        if (z3) {
            arrayList.add("ADJ");
        }
        if (z4) {
            arrayList.add("COMP");
        }
        if (z6) {
            arrayList.add("NPINF");
        }
        if (z8) {
            arrayList.add("DOUBLENP");
        }
        if (z7) {
            arrayList.add("INF");
        }
        if (z12) {
            arrayList.add("NEXT_THAT");
        }
        if (z9) {
            arrayList.add(Expressions.TYPE_LIST);
        }
        if (z11) {
            arrayList.add("COMPOUND");
        }
        if (z10) {
            arrayList.add("IFCLAUSE");
        }
        if (z13) {
            arrayList.add("COMMA");
        }
        if (z14) {
            arrayList.add("PP");
        }
        if (z15) {
            arrayList.add("NPVP");
        }
        if (z16) {
            arrayList.add("QUOTES");
        }
        if (0 != 0) {
            arrayList.add("STATEMENT");
        }
        return arrayList;
    }

    private PositionInstance addRegexPatternFeatures(ChunkedExtraction chunkedExtraction, PositionInstance positionInstance, int i, int i2, int i3, boolean z) {
        boolean matchesAppositiveClause;
        boolean z2 = false;
        boolean z3 = false;
        boolean z4 = false;
        boolean z5 = false;
        if (this.mode != ArgLearner.Mode.LEFT) {
            z4 = this.pattern_extractor.vbgIsNext(chunkedExtraction, i3 + 1);
            matchesAppositiveClause = this.pattern_extractor.appClause(chunkedExtraction, i3 + 1);
            z2 = this.pattern_extractor.relClause(chunkedExtraction, i3 + 1);
        } else {
            matchesAppositiveClause = this.pattern_extractor.matchesAppositiveClause(chunkedExtraction, i3);
            z3 = this.pattern_extractor.matchesList(chunkedExtraction, i2, i3);
            z5 = this.pattern_extractor.prevStop(chunkedExtraction, i3);
        }
        if (matchesAppositiveClause) {
            positionInstance.addFeature("APP");
        }
        if (0 != 0) {
            positionInstance.addFeature("APP2");
        }
        if (z5) {
            positionInstance.addFeature("END");
        }
        if (z3) {
            positionInstance.addFeature(Expressions.TYPE_LIST);
        }
        if (z2) {
            positionInstance.addFeature("RELNEXT");
        }
        if (z4) {
            positionInstance.addFeature("NEXTVBG");
        }
        return positionInstance;
    }

    private int getCountToEnd(List<String> list, int i) {
        int i2 = 0;
        if (this.mode == ArgLearner.Mode.LEFT) {
            for (int i3 = i - 1; i3 > -1; i3--) {
                if (!list.get(i3).equals(OpenNlpUtils.IN_NP)) {
                    i2++;
                }
            }
        } else {
            for (int i4 = i + 1; i4 < list.size(); i4++) {
                if (!list.get(i4).equals(OpenNlpUtils.IN_NP)) {
                    i2++;
                }
            }
        }
        return i2;
    }

    private PositionInstance addBasicFeatures(PositionInstance positionInstance, String str, String str2, String str3) {
        positionInstance.addFeature("CHUNK_" + str2);
        positionInstance.addFeature("TAG_" + str3);
        if (ExtractionParameters.USE_LEX_FEATURES && (lexicalizeIfTag.contains(str3) || lexicalizeIfWord.contains(str.toLowerCase()))) {
            positionInstance.addFeature("WD_" + str.toLowerCase());
        }
        if (ExtractionParameters.USE_PATTERN_FEATURES) {
            Iterator<String> it = this.featureNames.iterator();
            Iterator<Pattern> it2 = this.patterns.iterator();
            while (it2.hasNext()) {
                Pattern next = it2.next();
                String next2 = it.next();
                if (next.matcher(str).matches()) {
                    positionInstance.addFeature(next2);
                }
            }
        }
        return positionInstance;
    }

    private PositionInstance addPatternFeatures(PositionInstance positionInstance, ChunkedExtraction chunkedExtraction, List<String> list, List<String> list2, int i, int i2, int i3, boolean z) {
        positionInstance.addFeature("C_" + getCountToEnd(list2, i));
        if (positionInstance.isMidInstance()) {
            for (int i4 = 0; i4 < list.size(); i4++) {
                positionInstance.addFeature(list.get(i4));
            }
            positionInstance = addRegexPatternFeatures(chunkedExtraction, positionInstance, i2, i3, i, z);
        }
        return positionInstance;
    }

    private Vector<PositionInstance> createBaseInstances(ChunkedExtraction chunkedExtraction, int i, int i2, Boolean bool) {
        Vector<PositionInstance> vector = new Vector<>();
        int start = chunkedExtraction.getStart();
        int length = chunkedExtraction.getLength() + start;
        ImmutableList<String> chunkTags = chunkedExtraction.getSentence().getChunkTags();
        ArrayList arrayList = new ArrayList();
        for (int i3 = 0; i3 < chunkTags.size(); i3++) {
            arrayList.add(chunkTags.get(i3));
        }
        if (length < arrayList.size() - 1 && arrayList.get(length).equals(OpenNlpUtils.IN_NP)) {
            arrayList.set(length, OpenNlpUtils.START_NP);
        }
        ImmutableList<String> tokens = chunkedExtraction.getSentence().getTokens();
        ImmutableList<String> posTags = chunkedExtraction.getSentence().getPosTags();
        ArrayList<String> setFeatures = getSetFeatures(chunkedExtraction, i2, bool.booleanValue());
        String str = "O";
        boolean z = false;
        int i4 = -1;
        for (int i5 = 0; i5 < chunkedExtraction.getSentence().getLength(); i5++) {
            if (!arrayList.get(i5).equals(OpenNlpUtils.IN_NP)) {
                PositionInstance positionInstance = new PositionInstance(i5);
                if ((this.mode != ArgLearner.Mode.LEFT || i5 >= start || i5 >= i2) && ((bool.booleanValue() || this.mode == ArgLearner.Mode.LEFT || i5 < length || i5 < i2) && (!bool.booleanValue() || this.mode == ArgLearner.Mode.LEFT || i5 < i))) {
                    positionInstance.setIsMidInstance(false);
                } else {
                    positionInstance.setIsMidInstance(true);
                }
                if (i5 >= start && i5 < length) {
                    positionInstance.setIsRelInstance(true);
                    z = true;
                } else if (i5 == i && this.mode != ArgLearner.Mode.LEFT) {
                    str = B_ARG;
                } else if (i5 < i || i5 >= i2) {
                    str = O;
                } else {
                    str = I_ARG;
                    i4 = vector.size();
                }
                PositionInstance addPatternFeatures = addPatternFeatures(addBasicFeatures(positionInstance, tokens.get(i5), arrayList.get(i5), posTags.get(i5)), chunkedExtraction, setFeatures, arrayList, i5, i, i2, bool.booleanValue());
                addPatternFeatures.addFeature(str);
                vector.add(addPatternFeatures);
            }
        }
        if (bool.booleanValue() && this.mode == ArgLearner.Mode.LEFT && i4 > -1) {
            vector.get(i4).setFeature(vector.get(i4).size() - 1, "B-ARG");
        }
        if (!z) {
            return null;
        }
        if (this.mode == ArgLearner.Mode.LEFT && bool.booleanValue() && i4 < 0) {
            return null;
        }
        return vector;
    }

    private PositionInstance addConjunctionFeatures(PositionInstance positionInstance) {
        if (ExtractionParameters.USE_CONJUNCTIVE_FEATURES) {
            for (int i = 0; i < 3 && i < positionInstance.size(); i++) {
                if (!positionInstance.get(i).contains("^")) {
                    for (int i2 = i + 1; i2 < 3 && i2 < positionInstance.size(); i2++) {
                        if (!positionInstance.get(i2).contains("^")) {
                            positionInstance.addFeature(positionInstance.size() - 1, positionInstance.get(i) + "^" + positionInstance.get(i2));
                        }
                    }
                }
            }
        }
        return positionInstance;
    }

    private PositionInstance addLeftWindowFeatures(Vector<PositionInstance> vector, int i) {
        PositionInstance positionInstance = vector.get(i);
        for (int max = Math.max(0, i - ExtractionParameters.WINDOW); max < i; max++) {
            int i2 = i - max;
            PositionInstance positionInstance2 = vector.get(max);
            if (positionInstance2.isMidInstance() && (ExtractionParameters.USE_CONTEXTUAL_FEATURES || positionInstance2.isMidInstance())) {
                String label = positionInstance2.label();
                for (int i3 = 0; i3 < 3 && i3 < positionInstance2.size(); i3++) {
                    String str = positionInstance2.get(i3);
                    if (i3 == CHUNK_FEATURE) {
                        String str2 = "L" + Integer.toString(i2) + "-" + str;
                        positionInstance.addFeature(positionInstance.size() - 1, str2);
                        if (ExtractionParameters.USE_CONJUNCTIVE_FEATURES && max == i - 1) {
                            positionInstance.addFeature(positionInstance.size() - 1, str2 + "^L0-" + positionInstance.get(i3));
                        }
                    } else if (i3 == TAG_FEATURE && !label.equals("O-NP")) {
                        String str3 = "L" + Integer.toString(i2) + "-" + str;
                        positionInstance.addFeature(positionInstance.size() - 1, str3);
                        if (ExtractionParameters.USE_CONJUNCTIVE_FEATURES && max == i - 1) {
                            positionInstance.addFeature(positionInstance.size() - 1, str3 + "^L0-" + positionInstance.get(i3));
                        }
                    } else if (i3 == LEX_FEATURE && str.charAt(0) == 'W') {
                        String str4 = "L" + Integer.toString(i2) + "-" + str;
                        positionInstance.addFeature(positionInstance.size() - 1, str4);
                        if (ExtractionParameters.USE_CONJUNCTIVE_FEATURES && max == i - 1 && positionInstance.get(i3).charAt(0) == 'W') {
                            positionInstance.addFeature(positionInstance.size() - 1, str4 + "^L0-" + positionInstance.get(i3));
                        }
                    }
                }
            }
        }
        return positionInstance;
    }

    private PositionInstance addRightWindowFeatures(Vector<PositionInstance> vector, int i) {
        PositionInstance positionInstance = vector.get(i);
        PositionInstance positionInstance2 = null;
        String str = null;
        for (int i2 = i + 1; i2 < vector.size() && i2 <= i + ExtractionParameters.WINDOW; i2++) {
            int i3 = i2 - i;
            PositionInstance positionInstance3 = vector.get(i2);
            String label = positionInstance3.label();
            if (positionInstance3.isMidInstance()) {
                if (ExtractionParameters.USE_CONTEXTUAL_FEATURES || positionInstance3.isMidInstance()) {
                    for (int i4 = 0; i4 < 3 && i4 < positionInstance3.size(); i4++) {
                        String str2 = positionInstance3.get(i4);
                        if (i4 == CHUNK_FEATURE) {
                            String str3 = "R" + Integer.toString(i3) + "-" + str2;
                            positionInstance.addFeature(positionInstance.size() - 1, str3);
                            if (ExtractionParameters.USE_CONJUNCTIVE_FEATURES && i2 == i + 2) {
                                positionInstance.addFeature(positionInstance.size() - 1, (str3 + "^R1-" + positionInstance2.get(i4)) + "^R0-" + positionInstance.get(i4));
                            }
                        } else if (i4 == TAG_FEATURE && !label.equals("O-NP")) {
                            String str4 = "R" + Integer.toString(i3) + "-" + str2;
                            positionInstance.addFeature(positionInstance.size() - 1, str4);
                            if (ExtractionParameters.USE_CONJUNCTIVE_FEATURES && i2 == i + 2 && !str.equals("O-NP")) {
                                positionInstance.addFeature(positionInstance.size() - 1, (str4 + "^R1-" + positionInstance2.get(i4)) + "^R0-" + positionInstance.get(i4));
                            }
                        } else if (i4 == LEX_FEATURE && str2.charAt(0) == 'W') {
                            String str5 = "R" + Integer.toString(i3) + "-" + str2;
                            positionInstance.addFeature(positionInstance.size() - 1, str5);
                            if (ExtractionParameters.USE_CONJUNCTIVE_FEATURES && i2 == i + 2 && !str.equals("O-NP") && positionInstance.get(i4).charAt(0) == 'W' && positionInstance2.get(i4).charAt(0) == 'W') {
                                positionInstance.addFeature(positionInstance.size() - 1, (str5 + "^R1-" + positionInstance2.get(i4)) + "^R0-" + positionInstance.get(i4));
                            }
                        }
                    }
                }
                str = label;
                positionInstance2 = positionInstance3;
            }
        }
        return positionInstance;
    }

    private Vector<Vector<String>> addWindowFeatures(Vector<PositionInstance> vector) {
        Vector<Vector<String>> vector2 = new Vector<>();
        for (int i = 0; i < vector.size(); i++) {
            PositionInstance positionInstance = vector.get(i);
            if (positionInstance.isMidInstance()) {
                addConjunctionFeatures(positionInstance);
                addLeftWindowFeatures(vector, i);
                PositionInstance addRightWindowFeatures = addRightWindowFeatures(vector, i);
                if (this.mode == ArgLearner.Mode.LEFT) {
                    vector2.add(0, addRightWindowFeatures.features());
                } else {
                    vector2.add(addRightWindowFeatures.features());
                }
            }
        }
        return vector2;
    }

    private Vector<Vector<String>> addPredicateFeatures(Vector<PositionInstance> vector, Vector<Vector<String>> vector2) {
        Vector<String> vector3 = new Vector<>();
        String str = "";
        String str2 = "";
        String str3 = "";
        for (int i = 0; i < vector.size(); i++) {
            PositionInstance positionInstance = vector.get(i);
            if (positionInstance.isRelInstance()) {
                Vector<String> features = positionInstance.features();
                for (int i2 = 0; i2 < features.size(); i2++) {
                    if (features.get(i2).split("_").length > 1) {
                        String trim = features.get(i2).split("_")[0].trim();
                        String trim2 = features.get(i2).split("_")[1].trim();
                        if (trim.equals("TAG")) {
                            str2 = str2 + "TAG_" + trim2 + "^";
                        } else if (trim.equals("CHUNK")) {
                            str = str + "CHUNK_" + trim2 + "^";
                        } else if (trim.equals("WD")) {
                            str3 = str3 + "WD_" + trim2 + "^";
                        }
                    }
                }
            }
        }
        if (str3.length() > 0) {
            vector3.add(0, str3.substring(0, str3.length() - 1));
        }
        if (str2.length() > 0) {
            vector3.add(0, str2.substring(0, str2.length() - 1));
        }
        if (str.length() > 0) {
            vector3.add(0, str.substring(0, str.length() - 1));
        }
        vector3.add("ENTITY1");
        vector3.add("O");
        vector2.add(0, vector3);
        return vector2;
    }

    public String extractCRFFeatures(ChunkedExtraction chunkedExtraction, int i, int i2, Boolean bool) {
        Vector<PositionInstance> createBaseInstances = createBaseInstances(chunkedExtraction, i, i2, bool);
        if (createBaseInstances == null) {
            return null;
        }
        Vector<Vector<String>> addWindowFeatures = addWindowFeatures(createBaseInstances);
        if (addWindowFeatures.size() == 0) {
            return null;
        }
        return convertInstancesToFeatures(addPredicateFeatures(createBaseInstances, addWindowFeatures), bool);
    }
}
