package de.dfki.km.leech.parser;

import de.dfki.inquisition.collections.MultiValueHashMap;
import de.dfki.km.leech.Leech;
import de.dfki.km.leech.io.URLStreamProvider;
import java.io.InputStream;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Scanner;
import java.util.Set;
import java.util.logging.Logger;
import javax.mail.URLName;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.xml.sax.ContentHandler;

/* loaded from: input_file:de/dfki/km/leech/parser/UrlListCrawlerParser.class */
public class UrlListCrawlerParser extends CrawlerParser {
    private static final long serialVersionUID = -1061129792080490892L;
    protected Leech m_leech;

    @Override // de.dfki.km.leech.parser.CrawlerParser
    protected Iterator<MultiValueHashMap<String, Object>> getSubDataEntitiesInformation(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws Exception {
        LinkedList linkedList = new LinkedList();
        Scanner useDelimiter = new Scanner(inputStream, "UTF-8").useDelimiter("\n");
        Throwable th = null;
        while (useDelimiter.hasNext()) {
            try {
                try {
                    String next = useDelimiter.next();
                    if (!next.startsWith("//")) {
                        MultiValueHashMap multiValueHashMap = new MultiValueHashMap();
                        multiValueHashMap.add("url", next);
                        linkedList.add(multiValueHashMap);
                    }
                } finally {
                }
            } catch (Throwable th2) {
                if (useDelimiter != null) {
                    if (th != null) {
                        try {
                            useDelimiter.close();
                        } catch (Throwable th3) {
                            th.addSuppressed(th3);
                        }
                    } else {
                        useDelimiter.close();
                    }
                }
                throw th2;
            }
        }
        if (useDelimiter != null) {
            if (0 != 0) {
                try {
                    useDelimiter.close();
                } catch (Throwable th4) {
                    th.addSuppressed(th4);
                }
            } else {
                useDelimiter.close();
            }
        }
        return linkedList.iterator();
    }

    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return Collections.singleton(new MediaType("application", "leechUrlList"));
    }

    @Override // de.dfki.km.leech.parser.CrawlerParser
    protected void processCurrentDataEntity(InputStream inputStream, Metadata metadata, ContentHandler contentHandler, ParseContext parseContext) throws Exception {
    }

    @Override // de.dfki.km.leech.parser.CrawlerParser
    protected void processSubDataEntity(MultiValueHashMap<String, Object> multiValueHashMap, Metadata metadata, ContentHandler contentHandler, ParseContext parseContext) throws Exception {
        String str = (String) multiValueHashMap.getFirst("url");
        URLName uRLName = new URLName(str);
        Metadata addFirstMetadata = URLStreamProvider.getURLStreamProvider(uRLName).addFirstMetadata(uRLName, metadata, parseContext);
        TikaInputStream stream = URLStreamProvider.getURLStreamProvider(uRLName).getStream(uRLName, addFirstMetadata, parseContext);
        try {
            if (this.m_leech == null) {
                this.m_leech = new Leech();
            }
            Logger.getLogger(UrlListCrawlerParser.class.getName()).info("Will crawl " + str);
            this.m_leech.getParser().parse(stream, contentHandler, addFirstMetadata, parseContext);
            if (stream != null) {
                stream.close();
            }
        } catch (Throwable th) {
            if (stream != null) {
                stream.close();
            }
            throw th;
        }
    }
}
