package de.dfki.km.leech.parser;

import de.dfki.inquisition.collections.MultiValueHashMap;
import de.dfki.km.leech.config.CrawlerContext;
import de.dfki.km.leech.parser.incremental.IncrementalCrawlingParser;
import de.dfki.km.leech.util.ExceptionUtils;
import de.dfki.km.leech.util.TikaUtils;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.LinkedList;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/* loaded from: input_file:de/dfki/km/leech/parser/CrawlerParser.class */
public abstract class CrawlerParser implements Parser {
    private static final long serialVersionUID = -6707880965147815349L;
    public static final String CURRENT_CRAWLING_DEPTH = "currentCrawlingDepth";
    public static final String SOURCEID = "sourceId";

    protected abstract Iterator<MultiValueHashMap<String, Object>> getSubDataEntitiesInformation(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws Exception;

    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        CrawlerContext crawlerContext = (CrawlerContext) parseContext.get(CrawlerContext.class);
        if (crawlerContext == null) {
            crawlerContext = new CrawlerContext();
        }
        String str = metadata.get("source");
        int i = 0;
        InputStream inputStream2 = null;
        try {
            try {
                String str2 = metadata.get(CURRENT_CRAWLING_DEPTH);
                if (str2 != null) {
                    i = Integer.valueOf(str2).intValue();
                }
                if (!IncrementalCrawlingParser.UNMODIFIED.equals(metadata.get(IncrementalCrawlingParser.DATA_ENTITY_MODIFICATION_STATE))) {
                    inputStream2 = TikaInputStream.get(TikaInputStream.get(inputStream).getPath());
                    processCurrentDataEntity(inputStream2, metadata, TikaUtils.createContentHandler4SubCrawl(crawlerContext), parseContext);
                }
                Iterator<MultiValueHashMap<String, Object>> it = i + 1 > crawlerContext.getCrawlingDepth() ? new LinkedList().iterator() : getSubDataEntitiesInformation(inputStream, contentHandler, TikaUtils.copyMetadata(metadata), parseContext);
                int i2 = 0;
                while (it.hasNext() && !crawlerContext.stopRequested().booleanValue()) {
                    MultiValueHashMap<String, Object> next = it.next();
                    ContentHandler createContentHandler4SubCrawl = TikaUtils.createContentHandler4SubCrawl(crawlerContext);
                    try {
                        TikaUtils.clearMetadata(metadata);
                        metadata.set(CURRENT_CRAWLING_DEPTH, String.valueOf(i + 1));
                        processSubDataEntity(next, metadata, createContentHandler4SubCrawl, parseContext);
                    } catch (Throwable th) {
                        Object first = next.getFirst(SOURCEID);
                        ExceptionUtils.handleException(th, first == null ? "noSourceId" : first.toString(), metadata, crawlerContext, parseContext, i, createContentHandler4SubCrawl);
                    }
                    i2++;
                    if (i2 % 10000 == 0) {
                        System.gc();
                        System.gc();
                    }
                }
                if (i != 0) {
                    if (inputStream2 != null) {
                        inputStream2.close();
                    }
                    Boolean stopRequested = crawlerContext.stopRequested();
                    synchronized (stopRequested) {
                        if (stopRequested.booleanValue() && i == 0) {
                            stopRequested.notifyAll();
                        }
                    }
                    return;
                }
                TikaUtils.clearMetadata(metadata);
                if (inputStream2 != null) {
                    inputStream2.close();
                }
                Boolean stopRequested2 = crawlerContext.stopRequested();
                synchronized (stopRequested2) {
                    if (stopRequested2.booleanValue() && i == 0) {
                        stopRequested2.notifyAll();
                    }
                }
            } catch (Exception e) {
                if (!(e instanceof TikaException)) {
                    throw new TikaException("Error while crawling '" + str + "'", e);
                }
                throw e;
            }
        } catch (Throwable th2) {
            if (inputStream2 != null) {
                inputStream2.close();
            }
            Boolean stopRequested3 = crawlerContext.stopRequested();
            synchronized (stopRequested3) {
                if (stopRequested3.booleanValue() && i == 0) {
                    stopRequested3.notifyAll();
                }
                throw th2;
            }
        }
    }

    protected abstract void processCurrentDataEntity(InputStream inputStream, Metadata metadata, ContentHandler contentHandler, ParseContext parseContext) throws Exception;

    protected abstract void processSubDataEntity(MultiValueHashMap<String, Object> multiValueHashMap, Metadata metadata, ContentHandler contentHandler, ParseContext parseContext) throws Exception;
}
