package org.apache.nutch.parse;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.protocol.Content;

/* loaded from: input_file:org/apache/nutch/parse/ParseUtil.class */
public class ParseUtil {
    public static final Log LOG = LogFactory.getLog(ParseUtil.class);
    private Configuration conf;
    private ParserFactory parserFactory;

    public ParseUtil(Configuration configuration) {
        this.conf = configuration;
        this.parserFactory = new ParserFactory(configuration);
    }

    public Parse parse(Content content) throws ParseException {
        try {
            Parser[] parsers = this.parserFactory.getParsers(content.getContentType(), content.getUrl() != null ? content.getUrl() : "");
            Parse parse = null;
            for (int i = 0; i < parsers.length; i++) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Parsing [" + content.getUrl() + "] with [" + parsers[i] + "]");
                }
                parse = parsers[i].getParse(content);
                if (parse != null && parse.getData().getStatus().isSuccess()) {
                    return parse;
                }
            }
            if (LOG.isWarnEnabled()) {
                LOG.warn("Unable to successfully parse content " + content.getUrl() + " of type " + content.getContentType());
            }
            ParseStatus status = parse.getData() != null ? parse.getData().getStatus() : null;
            return status == null ? new ParseStatus().getEmptyParse(this.conf) : status.getEmptyParse(this.conf);
        } catch (ParserNotFound e) {
            if (LOG.isWarnEnabled()) {
                LOG.warn("No suitable parser found when trying to parse content " + content.getUrl() + " of type " + content.getContentType());
            }
            throw new ParseException(e.getMessage());
        }
    }

    public Parse parseByExtensionId(String str, Content content) throws ParseException {
        try {
            Parse parse = this.parserFactory.getParserById(str).getParse(content);
            if (parse != null && parse.getData().getStatus().isSuccess()) {
                return parse;
            }
            if (LOG.isWarnEnabled()) {
                LOG.warn("Unable to successfully parse content " + content.getUrl() + " of type " + content.getContentType());
            }
            return new ParseStatus().getEmptyParse(this.conf);
        } catch (ParserNotFound e) {
            if (LOG.isWarnEnabled()) {
                LOG.warn("No suitable parser found when trying to parse content " + content.getUrl() + " of type " + content.getContentType());
            }
            throw new ParseException(e.getMessage());
        }
    }
}
