package org.apache.nutch.parse;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Vector;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.plugin.Extension;
import org.apache.nutch.plugin.ExtensionPoint;
import org.apache.nutch.plugin.PluginRepository;
import org.apache.nutch.plugin.PluginRuntimeException;
import org.apache.nutch.util.LogUtil;
import org.apache.nutch.util.mime.MimeType;
import org.apache.nutch.util.mime.MimeTypeException;

/* loaded from: input_file:org/apache/nutch/parse/ParserFactory.class */
public final class ParserFactory {
    public static final Log LOG = LogFactory.getLog(ParserFactory.class);
    public static final String DEFAULT_PLUGIN = "*";
    private final List EMPTY_EXTENSION_LIST = Collections.EMPTY_LIST;
    private Configuration conf;
    private ExtensionPoint extensionPoint;
    private ParsePluginList parsePluginList;

    public ParserFactory(Configuration configuration) {
        this.conf = configuration;
        this.extensionPoint = PluginRepository.get(configuration).getExtensionPoint(Parser.X_POINT_ID);
        this.parsePluginList = (ParsePluginList) configuration.getObject(ParsePluginList.class.getName());
        if (this.parsePluginList == null) {
            this.parsePluginList = new ParsePluginsReader().parse(configuration);
            configuration.setObject(ParsePluginList.class.getName(), this.parsePluginList);
        }
        if (this.extensionPoint == null) {
            throw new RuntimeException("x point " + Parser.X_POINT_ID + " not found.");
        }
        if (this.parsePluginList == null) {
            throw new RuntimeException("Parse Plugins preferences could not be loaded.");
        }
    }

    public Parser[] getParsers(String str, String str2) throws ParserNotFound {
        List<Extension> extensions = getExtensions(str);
        if (extensions == null) {
            throw new ParserNotFound(str2, str);
        }
        Vector vector = new Vector(extensions.size());
        for (Extension extension : extensions) {
            try {
                Parser parser = (Parser) this.conf.getObject(extension.getId());
                if (parser == null) {
                    parser = (Parser) extension.getExtensionInstance();
                    this.conf.setObject(extension.getId(), parser);
                }
                vector.add(parser);
            } catch (PluginRuntimeException e) {
                if (LOG.isWarnEnabled()) {
                    e.printStackTrace(LogUtil.getWarnStream(LOG));
                    LOG.warn("ParserFactory:PluginRuntimeException when initializing parser plugin " + extension.getDescriptor().getPluginId() + " instance in getParsers function: attempting to continue instantiating parsers");
                }
            }
        }
        return (Parser[]) vector.toArray(new Parser[0]);
    }

    public Parser getParserById(String str) throws ParserNotFound {
        Extension[] extensions = this.extensionPoint.getExtensions();
        Extension extension = null;
        if (str != null) {
            extension = getExtension(extensions, str);
        }
        if (extension == null) {
            extension = getExtensionFromAlias(extensions, str);
        }
        if (extension == null) {
            throw new ParserNotFound("No Parser Found for id [" + str + "]");
        }
        if (this.conf.getObject(extension.getId()) != null) {
            return (Parser) this.conf.getObject(extension.getId());
        }
        try {
            Parser parser = (Parser) extension.getExtensionInstance();
            this.conf.setObject(extension.getId(), parser);
            return parser;
        } catch (PluginRuntimeException e) {
            if (LOG.isWarnEnabled()) {
                LOG.warn("Canno initialize parser " + extension.getDescriptor().getPluginId() + " (cause: " + e.toString());
            }
            throw new ParserNotFound("Cannot init parser for id [" + str + "]");
        }
    }

    protected List getExtensions(String str) {
        String str2;
        try {
            str2 = MimeType.clean(str);
        } catch (MimeTypeException e) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Could not clean the content-type [" + str + "], Reason is [" + e + "]. Using its raw version...");
            }
            str2 = str;
        }
        List list = (List) this.conf.getObject(str2);
        if (list == this.EMPTY_EXTENSION_LIST) {
            return null;
        }
        if (list == null) {
            list = findExtensions(str2);
            if (list != null) {
                this.conf.setObject(str2, list);
            } else {
                this.conf.setObject(str2, this.EMPTY_EXTENSION_LIST);
            }
        }
        return list;
    }

    private List findExtensions(String str) {
        Extension[] extensions = this.extensionPoint.getExtensions();
        List matchExtensions = matchExtensions(this.parsePluginList.getPluginList(str), extensions, str);
        return matchExtensions != null ? matchExtensions : matchExtensions(this.parsePluginList.getPluginList(DEFAULT_PLUGIN), extensions, DEFAULT_PLUGIN);
    }

    private List matchExtensions(List list, Extension[] extensionArr, String str) {
        ArrayList arrayList = new ArrayList();
        if (list != null) {
            Iterator it = list.iterator();
            while (it.hasNext()) {
                String str2 = (String) it.next();
                Extension extension = getExtension(extensionArr, str2, str);
                if (extension == null) {
                    extension = getExtension(extensionArr, str2);
                    if (LOG.isWarnEnabled()) {
                        if (extension != null) {
                            LOG.warn("ParserFactory:Plugin: " + str2 + " mapped to contentType " + str + " via parse-plugins.xml, but its plugin.xml file does not claim to support contentType: " + str);
                        } else {
                            LOG.warn("ParserFactory: Plugin: " + str2 + " mapped to contentType " + str + " via parse-plugins.xml, but not enabled via plugin.includes in nutch-default.xml");
                        }
                    }
                }
                if (extension != null) {
                    arrayList.add(extension);
                }
            }
        } else {
            for (int i = 0; i < extensionArr.length; i++) {
                if (extensionArr[i].getAttribute("contentType") != null && extensionArr[i].getAttribute("contentType").equals(str)) {
                    arrayList.add(extensionArr[i].getId());
                }
            }
            if (arrayList.size() > 0) {
                if (LOG.isInfoEnabled()) {
                    LOG.info("The parsing plugins: " + arrayList + " are enabled via the plugin.includes system property, and all claim to support the content type " + str + ", but they are not mapped to it  in the parse-plugins.xml file");
                }
            } else if (LOG.isDebugEnabled()) {
                LOG.debug("ParserFactory:No parse plugins mapped or enabled for contentType " + str);
            }
        }
        if (arrayList.size() > 0) {
            return arrayList;
        }
        return null;
    }

    private boolean match(Extension extension, String str, String str2) {
        return str.equals(extension.getId()) && (str2.equals(extension.getAttribute("contentType")) || str2.equals(DEFAULT_PLUGIN));
    }

    private Extension getExtension(Extension[] extensionArr, String str, String str2) {
        for (int i = 0; i < extensionArr.length; i++) {
            if (match(extensionArr[i], str, str2)) {
                return extensionArr[i];
            }
        }
        return null;
    }

    private Extension getExtension(Extension[] extensionArr, String str) {
        for (int i = 0; i < extensionArr.length; i++) {
            if (str.equals(extensionArr[i].getId())) {
                return extensionArr[i];
            }
        }
        return null;
    }

    private Extension getExtensionFromAlias(Extension[] extensionArr, String str) {
        return getExtension(extensionArr, (String) this.parsePluginList.getAliases().get(str));
    }
}
