package org.apache.nutch.net;

import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Vector;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.plugin.Extension;
import org.apache.nutch.plugin.ExtensionPoint;
import org.apache.nutch.plugin.PluginRepository;
import org.apache.nutch.plugin.PluginRuntimeException;

/* loaded from: input_file:org/apache/nutch/net/URLNormalizers.class */
public final class URLNormalizers {
    public static final String SCOPE_DEFAULT = "default";
    public static final String SCOPE_PARTITION = "partition";
    public static final String SCOPE_GENERATE_HOST_COUNT = "generate_host_count";
    public static final String SCOPE_FETCHER = "fetcher";
    public static final String SCOPE_CRAWLDB = "crawldb";
    public static final String SCOPE_LINKDB = "linkdb";
    public static final String SCOPE_INJECT = "inject";
    public static final String SCOPE_OUTLINK = "outlink";
    public static final Log LOG = LogFactory.getLog(URLNormalizers.class);
    private final List EMPTY_EXTENSION_LIST = Collections.EMPTY_LIST;
    private final URLNormalizer[] EMPTY_NORMALIZERS = new URLNormalizer[0];
    private Configuration conf;
    private ExtensionPoint extensionPoint;
    private URLNormalizer[] normalizers;
    private int loopCount;

    public URLNormalizers(Configuration configuration, String str) {
        this.conf = configuration;
        this.extensionPoint = PluginRepository.get(configuration).getExtensionPoint(URLNormalizer.X_POINT_ID);
        if (this.extensionPoint == null) {
            throw new RuntimeException("x point " + URLNormalizer.X_POINT_ID + " not found.");
        }
        this.normalizers = (URLNormalizer[]) configuration.getObject(URLNormalizer.X_POINT_ID + "_" + str);
        if (this.normalizers == null) {
            this.normalizers = getURLNormalizers(str);
        }
        if (this.normalizers == this.EMPTY_NORMALIZERS) {
            this.normalizers = (URLNormalizer[]) configuration.getObject(URLNormalizer.X_POINT_ID + "_default");
            if (this.normalizers == null) {
                this.normalizers = getURLNormalizers("default");
            }
        }
        this.loopCount = configuration.getInt("urlnormalizer.loop.count", 1);
    }

    URLNormalizer[] getURLNormalizers(String str) {
        List<Extension> extensions = getExtensions(str);
        if (extensions == this.EMPTY_EXTENSION_LIST) {
            return this.EMPTY_NORMALIZERS;
        }
        Vector vector = new Vector(extensions.size());
        for (Extension extension : extensions) {
            try {
                URLNormalizer uRLNormalizer = (URLNormalizer) this.conf.getObject(extension.getId());
                if (uRLNormalizer == null) {
                    uRLNormalizer = (URLNormalizer) extension.getExtensionInstance();
                    this.conf.setObject(extension.getId(), uRLNormalizer);
                }
                vector.add(uRLNormalizer);
            } catch (PluginRuntimeException e) {
                e.printStackTrace();
                LOG.warn("URLNormalizers:PluginRuntimeException when initializing url normalizer plugin " + extension.getDescriptor().getPluginId() + " instance in getURLNormalizers function: attempting to continue instantiating plugins");
            }
        }
        return (URLNormalizer[]) vector.toArray(new URLNormalizer[vector.size()]);
    }

    private List getExtensions(String str) {
        List list = (List) this.conf.getObject(URLNormalizer.X_POINT_ID + "_x_" + str);
        if (list == this.EMPTY_EXTENSION_LIST) {
            return this.EMPTY_EXTENSION_LIST;
        }
        if (list == null) {
            list = findExtensions(str);
            if (list != null) {
                this.conf.setObject(URLNormalizer.X_POINT_ID + "_x_" + str, list);
            } else {
                this.conf.setObject(URLNormalizer.X_POINT_ID + "_x_" + str, this.EMPTY_EXTENSION_LIST);
                list = this.EMPTY_EXTENSION_LIST;
            }
        }
        return list;
    }

    private List findExtensions(String str) {
        String[] strArr = null;
        String str2 = this.conf.get("urlnormalizer.order." + str);
        if (str2 == null) {
            str2 = this.conf.get("urlnormalizer.order");
        }
        if (str2 != null && !str2.trim().equals("")) {
            strArr = str2.split("\\s+");
        }
        String str3 = this.conf.get("urlnormalizer.scope." + str);
        HashSet hashSet = null;
        if (str3 != null && !str3.trim().equals("")) {
            hashSet = new HashSet(Arrays.asList(str3.split("\\s+")));
        }
        Extension[] extensions = this.extensionPoint.getExtensions();
        HashMap hashMap = new HashMap();
        for (Extension extension : extensions) {
            if (hashSet == null || hashSet.contains(extension.getClazz())) {
                hashMap.put(extension.getClazz(), extension);
            }
        }
        ArrayList arrayList = new ArrayList();
        if (strArr == null) {
            arrayList.addAll(hashMap.values());
        } else {
            for (int i = 0; i < strArr.length; i++) {
                Extension extension2 = (Extension) hashMap.get(strArr[i]);
                if (extension2 != null) {
                    arrayList.add(extension2);
                    hashMap.remove(strArr[i]);
                }
            }
            arrayList.addAll(hashMap.values());
        }
        return arrayList;
    }

    public String normalize(String str, String str2) throws MalformedURLException {
        String str3 = str;
        for (int i = 0; i < this.loopCount; i++) {
            for (int i2 = 0; i2 < this.normalizers.length; i2++) {
                if (str == null) {
                    return null;
                }
                str = this.normalizers[i2].normalize(str, str2);
            }
            if (str3.equals(str)) {
                break;
            }
            str3 = str;
        }
        return str;
    }
}
