package org.apache.nutch.crawl;

import java.io.IOException;
import java.util.Iterator;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Closeable;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.MapFileOutputFormat;
import org.apache.hadoop.mapred.Partitioner;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapred.lib.HashPartitioner;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.ToolBase;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;

/* loaded from: input_file:org/apache/nutch/crawl/LinkDbReader.class */
public class LinkDbReader extends ToolBase implements Closeable {
    public static final Log LOG = LogFactory.getLog(LinkDbReader.class);
    private static final Partitioner PARTITIONER = new HashPartitioner();
    private FileSystem fs;
    private Path directory;
    private MapFile.Reader[] readers;

    public LinkDbReader() {
    }

    public LinkDbReader(Configuration configuration, Path path) throws Exception {
        setConf(configuration);
        init(path);
    }

    public void init(Path path) throws Exception {
        this.fs = FileSystem.get(getConf());
        this.directory = path;
    }

    public String[] getAnchors(Text text) throws IOException {
        Inlinks inlinks = getInlinks(text);
        if (inlinks == null) {
            return null;
        }
        return inlinks.getAnchors();
    }

    public Inlinks getInlinks(Text text) throws IOException {
        if (this.readers == null) {
            synchronized (this) {
                this.readers = MapFileOutputFormat.getReaders(this.fs, new Path(this.directory, "current"), getConf());
            }
        }
        return (Inlinks) MapFileOutputFormat.getEntry(this.readers, PARTITIONER, text, new Inlinks());
    }

    public void close() throws IOException {
        if (this.readers != null) {
            for (int i = 0; i < this.readers.length; i++) {
                this.readers[i].close();
            }
        }
    }

    public void processDumpJob(String str, String str2) throws IOException {
        if (LOG.isInfoEnabled()) {
            LOG.info("LinkDb dump: starting");
            LOG.info("LinkDb db: " + str);
        }
        Path path = new Path(str2);
        NutchJob nutchJob = new NutchJob(getConf());
        nutchJob.setJobName("read " + str);
        nutchJob.addInputPath(new Path(str, "current"));
        nutchJob.setInputFormat(SequenceFileInputFormat.class);
        nutchJob.setInputKeyClass(Text.class);
        nutchJob.setInputValueClass(Inlinks.class);
        nutchJob.setOutputPath(path);
        nutchJob.setOutputFormat(TextOutputFormat.class);
        nutchJob.setOutputKeyClass(Text.class);
        nutchJob.setOutputValueClass(Inlinks.class);
        JobClient.runJob(nutchJob);
    }

    public static void main(String[] strArr) throws Exception {
        System.exit(new LinkDbReader().doMain(NutchConfiguration.create(), strArr));
    }

    public int run(String[] strArr) throws Exception {
        if (strArr.length < 2) {
            System.err.println("Usage: LinkDbReader <linkdb> {-dump <out_dir> | -url <url>)");
            System.err.println("\t-dump <out_dir>\tdump whole link db to a text file in <out_dir>");
            System.err.println("\t-url <url>\tprint information about <url> to System.out");
            return -1;
        }
        try {
            if (strArr[1].equals("-dump")) {
                processDumpJob(strArr[0], strArr[2]);
                return 0;
            }
            if (!strArr[1].equals("-url")) {
                System.err.println("Error: wrong argument " + strArr[1]);
                return -1;
            }
            init(new Path(strArr[0]));
            Inlinks inlinks = getInlinks(new Text(strArr[2]));
            if (inlinks == null) {
                System.out.println(" - no link information.");
                return 0;
            }
            Iterator it = inlinks.iterator();
            while (it.hasNext()) {
                System.out.println(it.next().toString());
            }
            return 0;
        } catch (Exception e) {
            LOG.fatal("LinkDbReader: " + StringUtils.stringifyException(e));
            return -1;
        }
    }
}
