package org.apache.nutch.analysis;

import java.io.IOException;
import java.io.Reader;
import org.apache.hadoop.conf.Configuration;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;

/* loaded from: input_file:org/apache/nutch/analysis/NutchDocumentAnalyzer.class */
public class NutchDocumentAnalyzer extends NutchAnalyzer {
    private static Analyzer CONTENT_ANALYZER;
    public static final int INTER_ANCHOR_GAP = 4;
    private static Analyzer ANCHOR_ANALYZER;
    private Configuration conf;

    /* loaded from: input_file:org/apache/nutch/analysis/NutchDocumentAnalyzer$AnchorAnalyzer.class */
    private static class AnchorAnalyzer extends Analyzer {
        private AnchorAnalyzer() {
        }

        public final TokenStream tokenStream(String str, Reader reader) {
            return new AnchorFilter(NutchDocumentAnalyzer.CONTENT_ANALYZER.tokenStream(str, reader));
        }
    }

    /* loaded from: input_file:org/apache/nutch/analysis/NutchDocumentAnalyzer$AnchorFilter.class */
    private static class AnchorFilter extends TokenFilter {
        private boolean first;

        public AnchorFilter(TokenStream tokenStream) {
            super(tokenStream);
            this.first = true;
        }

        public final org.apache.lucene.analysis.Token next() throws IOException {
            org.apache.lucene.analysis.Token next = this.input.next();
            if (next == null) {
                return next;
            }
            if (this.first) {
                next.setPositionIncrement(4);
                this.first = false;
            }
            return next;
        }
    }

    /* loaded from: input_file:org/apache/nutch/analysis/NutchDocumentAnalyzer$ContentAnalyzer.class */
    private static class ContentAnalyzer extends Analyzer {
        private CommonGrams commonGrams;

        public ContentAnalyzer(Configuration configuration) {
            this.commonGrams = new CommonGrams(configuration);
        }

        public TokenStream tokenStream(String str, Reader reader) {
            return this.commonGrams.getFilter(new NutchDocumentTokenizer(reader), str);
        }
    }

    public NutchDocumentAnalyzer(Configuration configuration) {
        this.conf = configuration;
        CONTENT_ANALYZER = new ContentAnalyzer(configuration);
        ANCHOR_ANALYZER = new AnchorAnalyzer();
    }

    @Override // org.apache.nutch.analysis.NutchAnalyzer
    public TokenStream tokenStream(String str, Reader reader) {
        return ("anchor".equals(str) ? ANCHOR_ANALYZER : CONTENT_ANALYZER).tokenStream(str, reader);
    }
}
