package opennlp.tools.formats.masc;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import opennlp.tools.util.Span;

/* loaded from: input_file:WEB-INF/lib/opennlp-tools-2.1.0.jar:opennlp/tools/formats/masc/MascSentence.class */
public class MascSentence extends Span {
    private final List<MascWord> allDocumentWords;
    private final String text;
    private final List<MascWord> words;
    private final Map<Integer, MascWord> wordsById;
    private List<MascToken> sentenceTokens;
    private Map<Integer, Integer> tokensById;
    private List<Span> namedEntities;

    /* loaded from: input_file:WEB-INF/lib/opennlp-tools-2.1.0.jar:opennlp/tools/formats/masc/MascSentence$QuarkExtractor.class */
    private class QuarkExtractor {
        private final Map<Integer, MascWord> wordsById;
        private final List<MascWord> allDocumentWords;

        protected QuarkExtractor(Map<Integer, MascWord> map, List<MascWord> list) {
            this.wordsById = map;
            this.allDocumentWords = list;
        }

        protected MascWord get(int i) throws IOException {
            if (this.wordsById.containsKey(Integer.valueOf(i))) {
                return this.wordsById.get(Integer.valueOf(i));
            }
            for (MascWord mascWord : this.allDocumentWords) {
                if (mascWord.getId() == i) {
                    return mascWord;
                }
            }
            throw new IOException("Word" + i + " not found in the document.");
        }
    }

    public MascSentence(int i, int i2, String str, List<MascWord> list, List<MascWord> list2) {
        super(i, i2);
        this.sentenceTokens = null;
        this.tokensById = new HashMap();
        this.namedEntities = new ArrayList();
        this.text = str;
        this.words = list;
        this.allDocumentWords = list2;
        HashMap hashMap = new HashMap();
        for (MascWord mascWord : list) {
            hashMap.put(Integer.valueOf(mascWord.getId()), mascWord);
        }
        this.wordsById = hashMap;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public boolean tokenizePenn(Map<Integer, int[]> map, Map<Integer, int[]> map2, Map<Integer, String> map3, Map<Integer, String> map4) throws IOException {
        boolean z = true;
        QuarkExtractor quarkExtractor = new QuarkExtractor(this.wordsById, this.allDocumentWords);
        this.sentenceTokens = new ArrayList();
        HashMap hashMap = new HashMap();
        Iterator<MascWord> it = this.words.iterator();
        while (it.hasNext()) {
            int[] iArr = map2.get(Integer.valueOf(it.next().getId()));
            if (iArr != null) {
                for (int i : iArr) {
                    if (!hashMap.containsKey(Integer.valueOf(i))) {
                        int[] iArr2 = map.get(Integer.valueOf(i));
                        if (iArr2 == null) {
                            System.err.println("Token without quarks found: " + i);
                        }
                        for (int i2 : iArr2) {
                            if (!this.wordsById.containsKey(Integer.valueOf(i2))) {
                                z = false;
                                System.out.println("[WARNING] Some tokens cross sentence boundaries.\n\tQuark ID: " + i2 + "\n\tPenn token ID: " + i);
                            }
                        }
                        MascWord[] mascWordArr = new MascWord[iArr2.length];
                        for (int i3 = 0; i3 < mascWordArr.length; i3++) {
                            mascWordArr[i3] = quarkExtractor.get(iArr2[i3]);
                        }
                        int start = quarkExtractor.get(iArr2[0]).getStart();
                        int end = quarkExtractor.get(iArr2[iArr2.length - 1]).getEnd();
                        if (end - start > 0) {
                            this.sentenceTokens.add(new MascToken(start, end, i, map4.get(Integer.valueOf(i)), map3.get(Integer.valueOf(i)), mascWordArr));
                            hashMap.put(Integer.valueOf(i), true);
                        }
                    }
                }
            }
        }
        for (int i4 = 0; i4 < this.sentenceTokens.size(); i4++) {
            this.tokensById.put(Integer.valueOf(this.sentenceTokens.get(i4).getTokenId()), Integer.valueOf(i4));
        }
        this.sentenceTokens = Collections.unmodifiableList(this.sentenceTokens);
        return z;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public boolean addNamedEntities(Map<Integer, String> map, Map<Integer, List<Integer>> map2) throws IOException {
        boolean z = true;
        if (this.sentenceTokens == null) {
            throw new IOException("Named entity labels provided for un untokenized sentence.");
        }
        for (Map.Entry<Integer, List<Integer>> entry : map2.entrySet()) {
            String str = map.get(Integer.valueOf(entry.getKey().intValue()));
            List<Integer> value = entry.getValue();
            int size = this.sentenceTokens.size();
            int i = 0;
            boolean z2 = false;
            Iterator<Integer> it = value.iterator();
            while (it.hasNext()) {
                int intValue = it.next().intValue();
                if (this.tokensById.containsKey(Integer.valueOf(intValue))) {
                    z2 = true;
                    if (this.tokensById.get(Integer.valueOf(intValue)).intValue() < size) {
                        size = this.tokensById.get(Integer.valueOf(intValue)).intValue();
                    }
                    if (this.tokensById.get(Integer.valueOf(intValue)).intValue() > i) {
                        i = this.tokensById.get(Integer.valueOf(intValue)).intValue() + 1;
                    }
                }
            }
            if (z2) {
                this.namedEntities.add(new Span(size, i, str));
            }
        }
        this.namedEntities.sort(Comparator.comparingInt((v0) -> {
            return v0.getStart();
        }));
        HashSet hashSet = new HashSet();
        int i2 = 0;
        int i3 = 0 + 1;
        while (i3 < this.namedEntities.size()) {
            Span span = this.namedEntities.get(i2);
            Span span2 = this.namedEntities.get(i3);
            if (span.contains(span2) || span.crosses(span2)) {
                System.out.println("[WARNING] Named entities overlap. This is forbidden in the OpenNLP.\n\tKeeping the longer of them.");
                if (span2.length() > span.length()) {
                    hashSet.add(Integer.valueOf(i2));
                } else {
                    hashSet.add(Integer.valueOf(i3));
                }
                z = false;
                i3++;
            } else {
                i2++;
            }
        }
        if (!z) {
            ArrayList arrayList = new ArrayList();
            for (int i4 = 0; i4 < this.namedEntities.size() - 1; i4++) {
                if (!hashSet.contains(Integer.valueOf(i4))) {
                    arrayList.add(this.namedEntities.get(i4));
                }
            }
            this.namedEntities = Collections.unmodifiableList(arrayList);
        }
        return z;
    }

    public List<Span> getNamedEntities() {
        return this.namedEntities;
    }

    public String getSentDetectText() {
        return this.text.substring(getStart(), getEnd());
    }

    public String getTokenText() {
        return this.sentenceTokens.isEmpty() ? "" : this.text.substring(this.sentenceTokens.get(0).getStart(), this.sentenceTokens.get(this.sentenceTokens.size() - 1).getEnd());
    }

    public List<String> getTokenStrings() {
        ArrayList arrayList = new ArrayList();
        for (MascToken mascToken : this.sentenceTokens) {
            arrayList.add(this.text.substring(mascToken.getStart(), mascToken.getEnd()));
        }
        return Collections.unmodifiableList(arrayList);
    }

    public List<Span> getTokensSpans() {
        ArrayList arrayList = new ArrayList();
        int start = this.sentenceTokens.isEmpty() ? 0 : this.sentenceTokens.get(0).getStart();
        for (MascToken mascToken : this.sentenceTokens) {
            arrayList.add(new Span(mascToken.getStart() - start, mascToken.getEnd() - start));
        }
        return Collections.unmodifiableList(arrayList);
    }

    public List<String> getTags() throws IOException {
        ArrayList arrayList = new ArrayList();
        Iterator<MascToken> it = this.sentenceTokens.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().getPos());
        }
        return arrayList;
    }
}
