package de.dfki.leech.foresight;

import de.dfki.inquisitor.collections.MultiValueHashMap;
import de.dfki.inquisitor.processes.StopWatch;
import de.dfki.km.leech.Leech;
import de.dfki.km.leech.io.URLStreamProvider;
import de.dfki.km.leech.parser.CrawlerParser;
import de.dfki.km.leech.sax.PrintlnContentHandler;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Set;
import javax.mail.URLName;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;

/* loaded from: input_file:de/dfki/leech/foresight/SpringerLinkJournalListParser.class */
public class SpringerLinkJournalListParser extends CrawlerParser {
    private static final long serialVersionUID = 2806053179884244671L;
    protected static int iCurrentItemIndex = 0;
    protected static int iSleepDuration = 120000;
    protected static int iSleepIntervall = 100;
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(new HashSet(Arrays.asList(MediaType.application("vnd.fs.springerlinkJournal"))));

    public static void main(String[] strArr) throws Throwable {
        new Leech().parse("http://rd.springer.com/journal/12273/7/6/page/1", new PrintlnContentHandler(PrintlnContentHandler.Verbosity.all), new ParseContext());
    }

    protected Iterator<MultiValueHashMap<String, Object>> getSubDataEntitiesInformation(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws Exception {
        inputStream.close();
        LinkedList linkedList = new LinkedList();
        Iterator it = Jsoup.connect(metadata.get("source")).get().select(".toc > ol:nth-child(2) > li").iterator();
        while (it.hasNext()) {
            Elements select = ((Element) it.next()).select("div:nth-child(1) > h3:nth-child(2) > a:nth-child(1)[href]");
            if (!select.isEmpty()) {
                String attr = select.attr("abs:href");
                MultiValueHashMap multiValueHashMap = new MultiValueHashMap();
                multiValueHashMap.add("source", attr);
                linkedList.add(multiValueHashMap);
            }
        }
        return linkedList.iterator();
    }

    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return SUPPORTED_TYPES;
    }

    protected void processCurrentDataEntity(InputStream inputStream, Metadata metadata, ContentHandler contentHandler, ParseContext parseContext) throws Exception {
    }

    protected void processSubDataEntity(MultiValueHashMap<String, Object> multiValueHashMap, Metadata metadata, ContentHandler contentHandler, ParseContext parseContext) throws Exception {
        URLName uRLName = new URLName(multiValueHashMap.getFirst("source", new Object[0]).toString());
        Metadata addFirstMetadata = URLStreamProvider.getURLStreamProvider(uRLName).addFirstMetadata(uRLName, metadata, parseContext);
        TikaInputStream stream = URLStreamProvider.getURLStreamProvider(uRLName).getStream(uRLName, addFirstMetadata, parseContext);
        try {
            new Leech().getParser().parse(stream, contentHandler, addFirstMetadata, parseContext);
            if (stream != null) {
                stream.close();
            }
            int i = iCurrentItemIndex + 1;
            iCurrentItemIndex = i;
            if (i >= iSleepIntervall) {
                iCurrentItemIndex = 0;
                LoggerFactory.getLogger(SpringerLinkJournalListParser.class.getName()).info("will sleep " + StopWatch.formatTimeDistance(iSleepDuration) + " in order to be not blocked by springer for the next 10 minutes.");
                Thread.sleep(iSleepDuration);
            }
        } catch (Throwable th) {
            if (stream != null) {
                stream.close();
            }
            throw th;
        }
    }
}
