package edu.washington.cs.knowitall.nlp;

import edu.washington.cs.knowitall.commonlib.Range;
import edu.washington.cs.knowitall.util.DefaultObjects;
import java.io.IOException;
import java.util.ArrayList;
import java.util.regex.Pattern;
import opennlp.tools.chunker.Chunker;
import opennlp.tools.postag.POSTagger;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.util.Span;
import org.apache.commons.lang3.StringUtils;

/* loaded from: input_file:WEB-INF/lib/reverb-core-1.4.1.jar:edu/washington/cs/knowitall/nlp/OpenNlpSentenceChunker.class */
public class OpenNlpSentenceChunker implements SentenceChunker {
    private Chunker chunker;
    private POSTagger posTagger;
    private Tokenizer tokenizer;
    private boolean attachOfs;
    private boolean attachPossessives;
    Pattern convertToSpace;

    public OpenNlpSentenceChunker() throws IOException {
        this.attachOfs = true;
        this.attachPossessives = true;
        this.convertToSpace = Pattern.compile("\\xa0");
        this.tokenizer = DefaultObjects.getDefaultTokenizer();
        this.posTagger = DefaultObjects.getDefaultPosTagger();
        this.chunker = DefaultObjects.getDefaultChunker();
    }

    public OpenNlpSentenceChunker(Tokenizer tokenizer, POSTagger pOSTagger, Chunker chunker) {
        this.attachOfs = true;
        this.attachPossessives = true;
        this.convertToSpace = Pattern.compile("\\xa0");
        this.tokenizer = tokenizer;
        this.posTagger = pOSTagger;
        this.chunker = chunker;
    }

    public boolean attachOfs() {
        return this.attachOfs;
    }

    public boolean attachPossessives() {
        return this.attachPossessives;
    }

    public void attachOfs(boolean z) {
        this.attachOfs = z;
    }

    public void attachPossessives(boolean z) {
        this.attachPossessives = z;
    }

    @Override // edu.washington.cs.knowitall.nlp.SentenceChunker
    public ChunkedSentence chunkSentence(String str) throws ChunkerException {
        String replaceAll = this.convertToSpace.matcher(str).replaceAll(StringUtils.SPACE);
        try {
            Span[] spanArr = this.tokenizer.tokenizePos(replaceAll);
            ArrayList arrayList = new ArrayList(spanArr.length);
            ArrayList arrayList2 = new ArrayList(spanArr.length);
            for (Span span : spanArr) {
                arrayList.add(Range.fromInterval(span.getStart(), span.getEnd()));
                arrayList2.add(replaceAll.substring(span.getStart(), span.getEnd()));
            }
            String[] strArr = (String[]) arrayList2.toArray(new String[0]);
            String[] tag = this.posTagger.tag(strArr);
            String[] chunk = this.chunker.chunk(strArr, tag);
            if (this.attachOfs) {
                OpenNlpUtils.attachOfs(strArr, chunk);
            }
            if (this.attachPossessives) {
                OpenNlpUtils.attachPossessives(tag, chunk);
            }
            return new ChunkedSentence((Range[]) arrayList.toArray(new Range[0]), strArr, tag, chunk);
        } catch (NullPointerException e) {
            throw new ChunkerException("OpenNLP threw NPE on '" + replaceAll + "'", e);
        }
    }
}
