package dfki.km.medico.aperture.crawler;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.Set;
import org.ontoware.rdf2go.RDF2Go;
import org.ontoware.rdf2go.model.Model;
import org.ontoware.rdf2go.model.node.URI;
import org.semanticdesktop.aperture.accessor.DataObject;
import org.semanticdesktop.aperture.accessor.FileDataObject;
import org.semanticdesktop.aperture.accessor.RDFContainerFactory;
import org.semanticdesktop.aperture.crawler.Crawler;
import org.semanticdesktop.aperture.crawler.CrawlerHandler;
import org.semanticdesktop.aperture.crawler.ExitCode;
import org.semanticdesktop.aperture.extractor.ExtractorException;
import org.semanticdesktop.aperture.extractor.ExtractorFactory;
import org.semanticdesktop.aperture.extractor.ExtractorRegistry;
import org.semanticdesktop.aperture.extractor.FileExtractor;
import org.semanticdesktop.aperture.extractor.FileExtractorFactory;
import org.semanticdesktop.aperture.extractor.impl.DefaultExtractorRegistry;
import org.semanticdesktop.aperture.mime.identifier.MimeTypeIdentifier;
import org.semanticdesktop.aperture.mime.identifier.magic.MagicMimeTypeIdentifier;
import org.semanticdesktop.aperture.rdf.RDFContainer;
import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl;
import org.semanticdesktop.aperture.subcrawler.SubCrawlerException;
import org.semanticdesktop.aperture.subcrawler.SubCrawlerFactory;
import org.semanticdesktop.aperture.subcrawler.SubCrawlerRegistry;
import org.semanticdesktop.aperture.util.IOUtil;
import org.semanticdesktop.aperture.vocabulary.NIE;

/* loaded from: input_file:dfki/km/medico/aperture/crawler/CrawlerHandlerBase.class */
public class CrawlerHandlerBase implements CrawlerHandler {
    protected boolean extractingContents;
    protected MimeTypeIdentifier mimeTypeIdentifier;
    protected ExtractorRegistry extractorRegistry;
    protected SubCrawlerRegistry subCrawlerRegistry;

    public CrawlerHandlerBase() {
        this.extractingContents = true;
        this.extractingContents = true;
        this.extractorRegistry = new DefaultExtractorRegistry();
        this.mimeTypeIdentifier = new MagicMimeTypeIdentifier();
    }

    public CrawlerHandlerBase(MimeTypeIdentifier mimeTypeIdentifier, ExtractorRegistry extractorRegistry, SubCrawlerRegistry subCrawlerRegistry) {
        this.extractingContents = true;
        this.mimeTypeIdentifier = mimeTypeIdentifier;
        this.extractorRegistry = extractorRegistry;
        this.subCrawlerRegistry = subCrawlerRegistry;
    }

    public RDFContainerFactory getRDFContainerFactory(Crawler crawler, String str) {
        return new RDFContainerFactory() { // from class: dfki.km.medico.aperture.crawler.CrawlerHandlerBase.1
            public RDFContainer getRDFContainer(URI uri) {
                Model createModel = RDF2Go.getModelFactory().createModel();
                createModel.open();
                return new RDFContainerImpl(createModel, uri);
            }
        };
    }

    public void accessingObject(Crawler crawler, String str) {
    }

    public void clearFinished(Crawler crawler, ExitCode exitCode) {
    }

    public void clearingObject(Crawler crawler, String str) {
    }

    public void clearStarted(Crawler crawler) {
    }

    public void crawlStarted(Crawler crawler) {
    }

    public void crawlStopped(Crawler crawler, ExitCode exitCode) {
    }

    public void objectChanged(Crawler crawler, DataObject dataObject) {
    }

    public void objectNew(Crawler crawler, DataObject dataObject) {
        dataObject.dispose();
    }

    public void objectNotModified(Crawler crawler, String str) {
    }

    public void objectRemoved(Crawler crawler, String str) {
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void processBinary(Crawler crawler, DataObject dataObject) throws IOException, ExtractorException, SubCrawlerException {
        if (this.extractingContents) {
            if (this.mimeTypeIdentifier == null) {
                throw new RuntimeException("MimeTypeIdentifier is not set. ");
            }
            if (dataObject == null) {
                throw new NullPointerException("dataObject is null. This parameter must be set.");
            }
            if (dataObject instanceof FileDataObject) {
                FileDataObject fileDataObject = (FileDataObject) dataObject;
                URI id = fileDataObject.getID();
                int minArrayLength = this.mimeTypeIdentifier.getMinArrayLength();
                BufferedInputStream bufferedInputStream = new BufferedInputStream(fileDataObject.getContent(), Math.max(minArrayLength, 8192));
                bufferedInputStream.mark(minArrayLength + 10);
                String identify = this.mimeTypeIdentifier.identify(IOUtil.readBytes(bufferedInputStream, minArrayLength), (String) null, id);
                if (identify != null) {
                    RDFContainer metadata = fileDataObject.getMetadata();
                    metadata.add(NIE.mimeType, identify);
                    bufferedInputStream.reset();
                    if (this.extractorRegistry != null) {
                        Set set = this.extractorRegistry.get(identify);
                        if (!set.isEmpty()) {
                            ((ExtractorFactory) set.iterator().next()).get().extract(id, bufferedInputStream, (Charset) null, identify, metadata);
                            return;
                        }
                        Set fileExtractorFactories = this.extractorRegistry.getFileExtractorFactories(identify);
                        if (!fileExtractorFactories.isEmpty()) {
                            FileExtractor fileExtractor = ((FileExtractorFactory) fileExtractorFactories.iterator().next()).get();
                            File file = fileDataObject.getFile();
                            if (file != null) {
                                fileExtractor.extract(id, file, (Charset) null, identify, metadata);
                                return;
                            }
                            File downloadContent = fileDataObject.downloadContent();
                            fileExtractor.extract(id, downloadContent, (Charset) null, identify, metadata);
                            downloadContent.delete();
                            return;
                        }
                    }
                    if (this.subCrawlerRegistry != null) {
                        Iterator it = this.subCrawlerRegistry.get(identify).iterator();
                        if (it.hasNext()) {
                            crawler.runSubCrawler(((SubCrawlerFactory) it.next()).get(), dataObject, bufferedInputStream, (Charset) null, identify);
                        }
                    }
                }
            }
        }
    }

    public boolean isExtractingContents() {
        return this.extractingContents;
    }

    public void setExtractingContents(boolean z) {
        this.extractingContents = z;
    }

    public ExtractorRegistry getExtractorRegistry() {
        return this.extractorRegistry;
    }

    public void setExtractorRegistry(ExtractorRegistry extractorRegistry) {
        this.extractorRegistry = extractorRegistry;
    }
}
