package org.apache.nutch.crawl;

import java.io.IOException;
import java.util.Iterator;
import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.ToolBase;
import org.apache.nutch.net.URLFilters;
import org.apache.nutch.net.URLNormalizers;
import org.apache.nutch.scoring.ScoringFilterException;
import org.apache.nutch.scoring.ScoringFilters;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;

/* loaded from: input_file:org/apache/nutch/crawl/Injector.class */
public class Injector extends ToolBase {
    public static final Log LOG = LogFactory.getLog(Injector.class);

    /* loaded from: input_file:org/apache/nutch/crawl/Injector$InjectMapper.class */
    public static class InjectMapper implements Mapper {
        private URLNormalizers urlNormalizers;
        private float interval;
        private float scoreInjected;
        private JobConf jobConf;
        private URLFilters filters;
        private ScoringFilters scfilters;
        private long curTime;

        public void configure(JobConf jobConf) {
            this.jobConf = jobConf;
            this.urlNormalizers = new URLNormalizers(jobConf, URLNormalizers.SCOPE_INJECT);
            this.interval = this.jobConf.getFloat("db.default.fetch.interval", 30.0f);
            this.filters = new URLFilters(this.jobConf);
            this.scfilters = new ScoringFilters(this.jobConf);
            this.scoreInjected = this.jobConf.getFloat("db.score.injected", 1.0f);
            this.curTime = jobConf.getLong("injector.current.time", System.currentTimeMillis());
        }

        public void close() {
        }

        public void map(WritableComparable writableComparable, Writable writable, OutputCollector outputCollector, Reporter reporter) throws IOException {
            Text text = (Text) writable;
            String text2 = text.toString();
            try {
                text2 = this.filters.filter(this.urlNormalizers.normalize(text2, URLNormalizers.SCOPE_INJECT));
            } catch (Exception e) {
                if (Injector.LOG.isWarnEnabled()) {
                    Injector.LOG.warn("Skipping " + text2 + ":" + e);
                }
                text2 = null;
            }
            if (text2 != null) {
                text.set(text2);
                CrawlDatum crawlDatum = new CrawlDatum(66, this.interval);
                crawlDatum.setFetchTime(this.curTime);
                crawlDatum.setScore(this.scoreInjected);
                try {
                    this.scfilters.injectedScore(text, crawlDatum);
                } catch (ScoringFilterException e2) {
                    if (Injector.LOG.isWarnEnabled()) {
                        Injector.LOG.warn("Cannot filter injected score for url " + text2 + ", using default (" + e2.getMessage() + ")");
                    }
                    crawlDatum.setScore(this.scoreInjected);
                }
                outputCollector.collect(text, crawlDatum);
            }
        }
    }

    /* loaded from: input_file:org/apache/nutch/crawl/Injector$InjectReducer.class */
    public static class InjectReducer implements Reducer {
        public void configure(JobConf jobConf) {
        }

        public void close() {
        }

        public void reduce(WritableComparable writableComparable, Iterator it, OutputCollector outputCollector, Reporter reporter) throws IOException {
            CrawlDatum crawlDatum = null;
            CrawlDatum crawlDatum2 = null;
            while (it.hasNext()) {
                CrawlDatum crawlDatum3 = (CrawlDatum) it.next();
                if (crawlDatum3.getStatus() == 66) {
                    crawlDatum2 = crawlDatum3;
                    crawlDatum2.setStatus(1);
                } else {
                    crawlDatum = crawlDatum3;
                }
            }
            outputCollector.collect(writableComparable, crawlDatum != null ? crawlDatum : crawlDatum2);
        }
    }

    public Injector() {
    }

    public Injector(Configuration configuration) {
        setConf(configuration);
    }

    public void inject(Path path, Path path2) throws IOException {
        if (LOG.isInfoEnabled()) {
            LOG.info("Injector: starting");
            LOG.info("Injector: crawlDb: " + path);
            LOG.info("Injector: urlDir: " + path2);
        }
        Path path3 = new Path(getConf().get("mapred.temp.dir", ".") + "/inject-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
        if (LOG.isInfoEnabled()) {
            LOG.info("Injector: Converting injected urls to crawl db entries.");
        }
        NutchJob nutchJob = new NutchJob(getConf());
        nutchJob.setJobName("inject " + path2);
        nutchJob.setInputPath(path2);
        nutchJob.setMapperClass(InjectMapper.class);
        nutchJob.setOutputPath(path3);
        nutchJob.setOutputFormat(SequenceFileOutputFormat.class);
        nutchJob.setOutputKeyClass(Text.class);
        nutchJob.setOutputValueClass(CrawlDatum.class);
        nutchJob.setLong("injector.current.time", System.currentTimeMillis());
        JobClient.runJob(nutchJob);
        if (LOG.isInfoEnabled()) {
            LOG.info("Injector: Merging injected urls into crawl db.");
        }
        JobConf createJob = CrawlDb.createJob(getConf(), path);
        createJob.addInputPath(path3);
        createJob.setReducerClass(InjectReducer.class);
        JobClient.runJob(createJob);
        CrawlDb.install(createJob, path);
        new JobClient(getConf()).getFs().delete(path3);
        if (LOG.isInfoEnabled()) {
            LOG.info("Injector: done");
        }
    }

    public static void main(String[] strArr) throws Exception {
        System.exit(new Injector().doMain(NutchConfiguration.create(), strArr));
    }

    public int run(String[] strArr) throws Exception {
        if (strArr.length < 2) {
            System.err.println("Usage: Injector <crawldb> <url_dir>");
            return -1;
        }
        try {
            inject(new Path(strArr[0]), new Path(strArr[1]));
            return 0;
        } catch (Exception e) {
            LOG.fatal("Injector: " + StringUtils.stringifyException(e));
            return -1;
        }
    }
}
