package de.dfki.km.leech.parser.filter;

import de.dfki.km.leech.config.CrawlerContext;
import de.dfki.km.leech.metadata.LeechMetadata;
import de.dfki.km.leech.parser.CrawlerParser;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.logging.Logger;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.ParserDecorator;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/* loaded from: input_file:de/dfki/km/leech/parser/filter/URLFilteringParser.class */
public class URLFilteringParser extends ParserDecorator {
    private static final long serialVersionUID = 7864760975795972594L;
    Set<String> m_hsMetadataKeys;

    public URLFilteringParser(Parser parser) {
        this(parser, "source");
    }

    public URLFilteringParser(Parser parser, String... strArr) {
        super(parser);
        this.m_hsMetadataKeys = new HashSet();
        this.m_hsMetadataKeys.addAll(Arrays.asList(strArr));
    }

    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        CrawlerContext crawlerContext = (CrawlerContext) parseContext.get(CrawlerContext.class);
        if (crawlerContext == null) {
            crawlerContext = new CrawlerContext();
        }
        String str = metadata.get("source");
        if (str == null) {
            str = metadata.get(LeechMetadata.RESOURCE_NAME_KEY);
        }
        Iterator<String> it = this.m_hsMetadataKeys.iterator();
        while (it.hasNext()) {
            if (!crawlerContext.getURLFilter().accept(metadata.get(it.next()))) {
                if (crawlerContext.getVerbose().booleanValue()) {
                    Logger.getLogger(CrawlerParser.class.getName()).info("Data entity " + str + " is outside the URL constraints for this data source. Skipping.");
                    return;
                }
                return;
            }
        }
        getWrappedParser().parse(inputStream, contentHandler, metadata, parseContext);
    }
}
