package dfki.km.medico.aperture;

import dfki.km.medico.aperture.properties.DICOMFileExtractorProperties;
import java.io.FileInputStream;
import java.util.ArrayList;
import org.apache.log4j.PropertyConfigurator;
import org.ontoware.rdf2go.RDF2Go;
import org.ontoware.rdf2go.model.Model;
import org.ontoware.rdf2go.model.ModelSet;
import org.ontoware.rdf2go.model.node.impl.URIImpl;
import org.semanticdesktop.aperture.accessor.impl.DefaultDataAccessorRegistry;
import org.semanticdesktop.aperture.crawler.web.WebCrawler;
import org.semanticdesktop.aperture.datasource.config.DomainBoundaries;
import org.semanticdesktop.aperture.datasource.config.RegExpPattern;
import org.semanticdesktop.aperture.datasource.web.WebDataSource;
import org.semanticdesktop.aperture.extractor.impl.DefaultExtractorRegistry;
import org.semanticdesktop.aperture.hypertext.linkextractor.impl.DefaultLinkExtractorRegistry;
import org.semanticdesktop.aperture.mime.identifier.magic.MagicMimeTypeIdentifier;
import org.semanticdesktop.aperture.rdf.impl.RDFContainerImpl;

/* loaded from: input_file:dfki/km/medico/aperture/WebCrawlerExample.class */
public class WebCrawlerExample {
    public static void main(String[] strArr) throws Exception {
        WebCrawlerExample webCrawlerExample = new WebCrawlerExample();
        PropertyConfigurator.configure("src/main/resources/config/log4j.properties");
        System.setProperty("javax.net.ssl.trustStore", "src/main/resources/jssecacerts");
        if (strArr.length != 1) {
            System.err.println("Specify the root folder");
            System.exit(-1);
        }
        webCrawlerExample.doCrawling(strArr[0]);
    }

    public void doCrawling(String str) throws Exception {
        Model createModel = RDF2Go.getModelFactory().createModel();
        createModel.open();
        RDFContainerImpl rDFContainerImpl = new RDFContainerImpl(createModel, new URIImpl("source:dicomWebdavExtraction"), false);
        WebDataSource webDataSource = new WebDataSource();
        webDataSource.setConfiguration(rDFContainerImpl);
        webDataSource.setRootUrl(str);
        RegExpPattern regExpPattern = new RegExpPattern("(http://www.dfki.uni-kl.de/~moeller/dicoms/).*");
        ArrayList arrayList = new ArrayList();
        arrayList.add(regExpPattern);
        webDataSource.setDomainBoundaries(new DomainBoundaries(arrayList, new ArrayList()));
        webDataSource.setIncludeEmbeddedResources(true);
        ModelSet modelSet = DICOMFileExtractorProperties.getModelSet();
        modelSet.removeAll();
        ConfigCrawlerHandler configCrawlerHandler = new ConfigCrawlerHandler(modelSet, new URIImpl("source:dicomExtraction:uri"));
        configCrawlerHandler.setExtractorRegistry(new DefaultExtractorRegistry(new FileInputStream("src/main/resources/config/ExtractorRegistry.xml")));
        WebCrawler webCrawler = new WebCrawler();
        webCrawler.setDataSource(webDataSource);
        webCrawler.setDataAccessorRegistry(new DefaultDataAccessorRegistry());
        webCrawler.setMimeTypeIdentifier(new MagicMimeTypeIdentifier());
        webCrawler.setLinkExtractorRegistry(new DefaultLinkExtractorRegistry());
        webCrawler.setCrawlerHandler(configCrawlerHandler);
        webCrawler.crawl();
    }
}
