package org.apache.nutch.crawl;

import java.io.IOException;
import java.util.Iterator;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.nutch.net.URLFilters;
import org.apache.nutch.net.URLNormalizers;

/* loaded from: input_file:org/apache/nutch/crawl/LinkDbFilter.class */
public class LinkDbFilter implements Mapper {
    public static final String URL_FILTERING = "linkdb.url.filters";
    public static final String URL_NORMALIZING = "linkdb.url.normalizer";
    public static final String URL_NORMALIZING_SCOPE = "linkdb.url.normalizer.scope";
    private boolean filter;
    private boolean normalize;
    private URLFilters filters;
    private URLNormalizers normalizers;
    private JobConf jobConf;
    private String scope;
    public static final Log LOG = LogFactory.getLog(LinkDbFilter.class);

    public void configure(JobConf jobConf) {
        this.jobConf = jobConf;
        this.filter = jobConf.getBoolean(URL_FILTERING, false);
        this.normalize = jobConf.getBoolean(URL_NORMALIZING, false);
        if (this.filter) {
            this.filters = new URLFilters(jobConf);
        }
        if (this.normalize) {
            this.scope = jobConf.get(URL_NORMALIZING_SCOPE, URLNormalizers.SCOPE_LINKDB);
            this.normalizers = new URLNormalizers(jobConf, this.scope);
        }
    }

    public void close() {
    }

    public void map(WritableComparable writableComparable, Writable writable, OutputCollector outputCollector, Reporter reporter) throws IOException {
        String obj = writableComparable.toString();
        if (this.normalize) {
            try {
                obj = this.normalizers.normalize(obj, this.scope);
            } catch (Exception e) {
                LOG.warn("Skipping " + obj + ":" + e);
                obj = null;
            }
        }
        if (obj != null && this.filter) {
            try {
                obj = this.filters.filter(obj);
            } catch (Exception e2) {
                LOG.warn("Skipping " + obj + ":" + e2);
                obj = null;
            }
        }
        if (obj == null) {
            return;
        }
        Inlinks inlinks = (Inlinks) writable;
        Iterator it = inlinks.iterator();
        while (it.hasNext()) {
            String fromUrl = ((Inlink) it.next()).getFromUrl();
            if (this.normalize) {
                try {
                    fromUrl = this.normalizers.normalize(fromUrl, this.scope);
                } catch (Exception e3) {
                    LOG.warn("Skipping " + fromUrl + ":" + e3);
                    fromUrl = null;
                }
            }
            if (fromUrl != null && this.filter) {
                try {
                    fromUrl = this.filters.filter(fromUrl);
                } catch (Exception e4) {
                    LOG.warn("Skipping " + fromUrl + ":" + e4);
                    fromUrl = null;
                }
            }
            if (fromUrl == null) {
                it.remove();
            }
        }
        if (inlinks.size() == 0) {
            return;
        }
        outputCollector.collect(writableComparable, inlinks);
    }
}
