package org.apache.nutch.tools.compat;

import java.io.IOException;
import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.UTF8;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapFileOutputFormat;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.ToolBase;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.CrawlDb;
import org.apache.nutch.crawl.MapWritable;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;

/* loaded from: input_file:org/apache/nutch/tools/compat/CrawlDbConverter.class */
public class CrawlDbConverter extends ToolBase implements Mapper {
    private static final Log LOG = LogFactory.getLog(CrawlDbConverter.class);
    private static final String CONVERT_META_KEY = "db.converter.with.metadata";
    private boolean withMetadata;
    private Text newKey;

    public void configure(JobConf jobConf) {
        setConf(jobConf);
        this.withMetadata = jobConf.getBoolean(CONVERT_META_KEY, false);
        this.newKey = new Text();
    }

    public void map(WritableComparable writableComparable, Writable writable, OutputCollector outputCollector, Reporter reporter) throws IOException {
        this.newKey.set(writableComparable.toString());
        if (this.withMetadata) {
            CrawlDatum crawlDatum = (CrawlDatum) writable;
            MapWritable metaData = crawlDatum.getMetaData();
            if (metaData.size() > 0) {
                MapWritable mapWritable = new MapWritable();
                for (Writable writable2 : metaData.keySet()) {
                    Writable writable3 = metaData.get(writable2);
                    if (writable2 instanceof UTF8) {
                        writable2 = new Text(writable2.toString());
                    }
                    mapWritable.put(writable2, writable3);
                }
                crawlDatum.setMetaData(mapWritable);
            }
        }
        outputCollector.collect(this.newKey, writable);
    }

    public void close() throws IOException {
    }

    public static void main(String[] strArr) throws Exception {
        new CrawlDbConverter().doMain(NutchConfiguration.create(), strArr);
    }

    public int run(String[] strArr) throws Exception {
        if (strArr.length == 0) {
            System.err.println("Usage: CrawlDbConverter <oldDb> <newDb> [-withMetadata]");
            System.err.println("\toldDb\tname of the crawldb that uses UTF8 class.");
            System.err.println("\tnewDb\tname of the output crawldb that will use Text class.");
            System.err.println("\twithMetadata\tconvert also all metadata keys that use UTF8 to Text.");
            return -1;
        }
        NutchJob nutchJob = new NutchJob(getConf());
        FileSystem fileSystem = FileSystem.get(getConf());
        Path path = new Path(strArr[0], "current");
        Path path2 = new Path(path, Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
        if (!fileSystem.exists(path)) {
            LOG.fatal("Old db doesn't exist in '" + strArr[0] + "'");
            return -1;
        }
        boolean z = false;
        if (strArr.length > 2 && strArr[2].equalsIgnoreCase("-withMetadata")) {
            z = true;
        }
        nutchJob.setBoolean(CONVERT_META_KEY, z);
        nutchJob.setInputPath(path);
        nutchJob.setInputFormat(SequenceFileInputFormat.class);
        nutchJob.setMapperClass(CrawlDbConverter.class);
        nutchJob.setOutputFormat(MapFileOutputFormat.class);
        nutchJob.setOutputKeyClass(Text.class);
        nutchJob.setOutputValueClass(CrawlDatum.class);
        nutchJob.setOutputPath(path2);
        try {
            JobClient.runJob(nutchJob);
            CrawlDb.install(nutchJob, new Path(strArr[1]));
            return 0;
        } catch (Exception e) {
            LOG.fatal("Error: " + StringUtils.stringifyException(e));
            return -1;
        }
    }
}
