package edu.stanford.nlp.patterns.surface;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.process.DocumentPreprocessor;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.util.ArrayCoreMap;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.TypesafeMap;
import groovyjarjarcommonscli.HelpFormatter;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;

/* loaded from: input_file:edu/stanford/nlp/patterns/surface/AnnotatedTextReader.class */
public class AnnotatedTextReader {
    public static List<CoreMap> parseFile(BufferedReader bufferedReader, Set<String> set, Map<String, Class<? extends TypesafeMap.Key<String>>> map, boolean z, String str) throws IOException {
        java.util.regex.Pattern compile = java.util.regex.Pattern.compile("<(" + StringUtils.join(set, "|") + ")>");
        java.util.regex.Pattern compile2 = java.util.regex.Pattern.compile("</(" + StringUtils.join(set, "|") + ")>");
        ArrayList arrayList = new ArrayList();
        int i = -1;
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return arrayList;
            }
            i++;
            String[] split = readLine.split("\t", 2);
            String str2 = null;
            String str3 = null;
            if (split.length == 2) {
                str2 = split[0];
                str3 = split[1];
            } else if (split.length == 1) {
                str3 = split[0];
                str2 = String.valueOf(i);
            }
            String str4 = str + str2;
            DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(new StringReader(str3));
            documentPreprocessor.setTokenizerFactory(PTBTokenizer.PTBTokenizerFactory.newCoreLabelTokenizerFactory("ptb3Escaping=false,normalizeParentheses=false,escapeForwardSlashAsterisk=false"));
            String str5 = "O";
            int i2 = -1;
            Iterator<List<HasWord>> it = documentPreprocessor.iterator();
            while (it.hasNext()) {
                List<HasWord> next = it.next();
                i2++;
                String str6 = "";
                ArrayList arrayList2 = new ArrayList();
                Iterator<HasWord> it2 = next.iterator();
                while (it2.hasNext()) {
                    String word = it2.next().word();
                    Matcher matcher = compile.matcher(word);
                    Matcher matcher2 = compile2.matcher(word);
                    if (matcher.matches()) {
                        str5 = matcher.group(1);
                    } else if (matcher2.matches()) {
                        str5 = "O";
                    } else {
                        CoreLabel coreLabel = new CoreLabel();
                        ArrayList<String> arrayList3 = new ArrayList();
                        arrayList3.add(word);
                        for (String str7 : arrayList3) {
                            str6 = str6 + org.apache.commons.lang3.StringUtils.SPACE + str7;
                            coreLabel.setWord(str7);
                            coreLabel.setLemma(str7);
                            coreLabel.setValue(str7);
                            coreLabel.set(CoreAnnotations.TextAnnotation.class, str7);
                            coreLabel.set(CoreAnnotations.OriginalTextAnnotation.class, word);
                            if (z) {
                                coreLabel.set(CoreAnnotations.GoldAnswerAnnotation.class, str5);
                            }
                            if (map != null && map.containsKey(str5)) {
                                coreLabel.set(map.get(str5), str5);
                            }
                            arrayList2.add(coreLabel);
                        }
                    }
                }
                ArrayCoreMap arrayCoreMap = new ArrayCoreMap();
                arrayCoreMap.set(CoreAnnotations.TextAnnotation.class, str6.trim());
                arrayCoreMap.set(CoreAnnotations.TokensAnnotation.class, arrayList2);
                arrayCoreMap.set(CoreAnnotations.DocIDAnnotation.class, str4 + HelpFormatter.DEFAULT_OPT_PREFIX + i2);
                arrayList.add(arrayCoreMap);
            }
        }
    }
}
