package org.apache.nutch.parse;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.ProtocolFactory;
import org.apache.nutch.util.NutchConfiguration;

/* loaded from: input_file:org/apache/nutch/parse/ParserChecker.class */
public class ParserChecker {
    public static final Log LOG = LogFactory.getLog(ParserChecker.class);

    public static void main(String[] strArr) throws Exception {
        boolean z = false;
        boolean z2 = false;
        String str = null;
        String str2 = null;
        if (strArr.length == 0) {
            System.err.println("Usage: ParserChecker [-dumpText] [-forceAs mimeType] url");
            System.exit(-1);
        }
        int i = 0;
        while (i < strArr.length) {
            if (strArr[i].equals("-forceAs")) {
                z2 = true;
                i++;
                str = strArr[i];
            } else if (strArr[i].equals("-dumpText")) {
                z = true;
            } else if (i != strArr.length - 1) {
                System.err.println("Usage: ParserChecker [-dumpText] [-forceAs mimeType] url");
                System.exit(-1);
            } else {
                str2 = strArr[i];
            }
            i++;
        }
        if (LOG.isInfoEnabled()) {
            LOG.info("fetching: " + str2);
        }
        Configuration create = NutchConfiguration.create();
        Content content = new ProtocolFactory(create).getProtocol(str2).getProtocolOutput(new Text(str2), new CrawlDatum()).getContent();
        if (z2) {
            content.setContentType(str);
        } else {
            str = content.getContentType();
        }
        if (str == null) {
            System.err.println("");
            System.exit(-1);
        }
        if (LOG.isInfoEnabled()) {
            LOG.info("parsing: " + str2);
            LOG.info("contentType: " + str);
        }
        Parse parse = new ParseUtil(create).parse(content);
        System.out.print("---------\nParseData\n---------\n");
        System.out.print(parse.getData().toString());
        if (z) {
            System.out.print("---------\nParseText\n---------\n");
            System.out.print(parse.getText());
        }
        System.exit(0);
    }
}
