package org.apache.nutch.crawl;

import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.nutch.net.URLFilters;
import org.apache.nutch.net.URLNormalizers;

/* loaded from: input_file:org/apache/nutch/crawl/CrawlDbFilter.class */
public class CrawlDbFilter implements Mapper {
    public static final String URL_FILTERING = "crawldb.url.filters";
    public static final String URL_NORMALIZING = "crawldb.url.normalizers";
    public static final String URL_NORMALIZING_SCOPE = "crawldb.url.normalizers.scope";
    private boolean urlFiltering;
    private boolean urlNormalizers;
    private URLFilters filters;
    private URLNormalizers normalizers;
    private JobConf jobConf;
    private String scope;
    public static final Log LOG = LogFactory.getLog(CrawlDbFilter.class);
    private Text newKey = new Text();

    public void configure(JobConf jobConf) {
        this.jobConf = jobConf;
        this.urlFiltering = jobConf.getBoolean(URL_FILTERING, false);
        this.urlNormalizers = jobConf.getBoolean(URL_NORMALIZING, false);
        if (this.urlFiltering) {
            this.filters = new URLFilters(jobConf);
        }
        if (this.urlNormalizers) {
            this.scope = jobConf.get(URL_NORMALIZING_SCOPE, URLNormalizers.SCOPE_CRAWLDB);
            this.normalizers = new URLNormalizers(jobConf, this.scope);
        }
    }

    public void close() {
    }

    public void map(WritableComparable writableComparable, Writable writable, OutputCollector outputCollector, Reporter reporter) throws IOException {
        String obj = writableComparable.toString();
        if (this.urlNormalizers) {
            try {
                obj = this.normalizers.normalize(obj, this.scope);
            } catch (Exception e) {
                LOG.warn("Skipping " + obj + ":" + e);
                obj = null;
            }
        }
        if (obj != null && this.urlFiltering) {
            try {
                obj = this.filters.filter(obj);
            } catch (Exception e2) {
                LOG.warn("Skipping " + obj + ":" + e2);
                obj = null;
            }
        }
        if (obj != null) {
            this.newKey.set(obj);
            outputCollector.collect(this.newKey, writable);
        }
    }
}
