package de.dfki.km.leech;

import de.dfki.inquisition.text.StringUtils;
import de.dfki.km.leech.config.CrawlerContext;
import de.dfki.km.leech.config.LeechConfig;
import de.dfki.km.leech.io.URLStreamProvider;
import de.dfki.km.leech.parser.DirectoryCrawlerParser;
import de.dfki.km.leech.parser.filter.URLFilteringParser;
import de.dfki.km.leech.parser.incremental.IncrementalCrawlingHistory;
import de.dfki.km.leech.parser.incremental.IncrementalCrawlingParser;
import de.dfki.km.leech.sax.CrawlReportContentHandler;
import de.dfki.km.leech.sax.DataSinkContentHandler;
import de.dfki.km.leech.sax.PrintlnContentHandler;
import de.dfki.km.leech.util.ExceptionUtils;
import de.dfki.km.leech.util.UrlUtil;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.net.URL;
import java.rmi.server.UID;
import java.util.LinkedList;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.mail.URLName;
import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.DublinCore;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.ContentHandlerDecorator;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/* loaded from: input_file:de/dfki/km/leech/Leech.class */
public class Leech extends Tika {
    public static void main(String[] strArr) throws IOException, SAXException, TikaException {
        Logger.getLogger(Leech.class.getName()).info("Usage: leech <source2crawl_1> <source2crawl_2> ... <source2crawl_N>\n\nA source can be an URL for file://, http://, imap:// or -maybe in future- other urls (e.g. for databases, webDAV, etc...).\nIn the case the string is no correct url string, the method will use the string as file path and then generates an\naccording URL. Examples: 'file://myDataDir', 'file://bla.pdf', 'http://reuschling.github.com/leech/',\n'imap://usr:pswd@myImapServer.de:993/inbox', 'imaps://usr:pswd@myImapServer.de:993/inbox;uid=22'\n\nThis executable crawls all data and simply shows the metadata on the screen. Because leech is designed to be used as a\njava library, this exec is for quick testing purposes.\n\n");
        Leech leech = new Leech();
        CrawlerContext crawlerContext = new CrawlerContext();
        ContentHandlerDecorator cyclicReportPrintln = new CrawlReportContentHandler(new PrintlnContentHandler(PrintlnContentHandler.Verbosity.all)).setCyclicReportPrintln(7000L);
        for (String str : strArr) {
            Logger.getLogger(Leech.class.getName()).info("Will start crawling " + str + '\n');
            leech.parse(str, (ContentHandler) cyclicReportPrintln, crawlerContext.createParseContext());
        }
    }

    public Leech() {
        super(LeechConfig.getDefaultLeechConfig());
    }

    public Leech(LeechConfig leechConfig) {
        super(leechConfig);
    }

    public String detect(File file) throws IOException {
        return detect(new URLName(file.toURI().toURL()));
    }

    public String detect(URL url) {
        throw new UnsupportedOperationException("The java.net.URL class methods are not supported because our mechanism supporting new protocols and the according stream creation differ.\nUse the according URLName method instead");
    }

    public String detect(URLName uRLName) throws IOException {
        InputStream inputStream = null;
        try {
            try {
                Metadata addFirstMetadata = URLStreamProvider.getURLStreamProvider(uRLName).addFirstMetadata(uRLName, null, null);
                inputStream = URLStreamProvider.getURLStreamProvider(uRLName).getStream(uRLName, addFirstMetadata, null);
                String detect = detect(inputStream, addFirstMetadata);
                if (inputStream != null) {
                    inputStream.close();
                }
                return detect;
            } catch (Throwable th) {
                Logger.getLogger(Leech.class.getName()).log(Level.SEVERE, "Error", th);
                if (inputStream != null) {
                    inputStream.close();
                }
                return null;
            }
        } catch (Throwable th2) {
            if (inputStream != null) {
                inputStream.close();
            }
            throw th2;
        }
    }

    protected ContentHandler getContentHandler(ParseContext parseContext) {
        CrawlerContext crawlerContext = (CrawlerContext) parseContext.get(CrawlerContext.class);
        if (crawlerContext == null) {
            throw new IllegalStateException("no crawlerContext was set. Set a CrawlerContext with a configured handler or use another method with directly specifying a handler.");
        }
        ContentHandler contentHandler = crawlerContext.getContentHandler();
        if (!StringUtils.nullOrWhitespace(crawlerContext.getContentHandlerClassName())) {
            try {
                contentHandler = (ContentHandler) Class.forName(crawlerContext.getContentHandlerClassName()).newInstance();
            } catch (Throwable th) {
                Logger.getLogger(DirectoryCrawlerParser.class.getName()).log(Level.SEVERE, "Error during the instantiation of the configured content handler " + crawlerContext.getContentHandlerClassName(), th);
            }
        }
        if (contentHandler == null) {
            throw new IllegalStateException("no contentHandler was set. Have a look into the class CrawlerContext");
        }
        return contentHandler;
    }

    public Parser getParser() {
        return new URLFilteringParser(new IncrementalCrawlingParser(super.getParser()));
    }

    public Reader parse(File file) throws IOException {
        return parse(new URLName(file.toURI().toURL()));
    }

    /* JADX WARN: Multi-variable type inference failed */
    public void parse(File file, ContentHandler contentHandler) throws IOException, SAXException, TikaException {
        ParseContext parseContext = new ParseContext();
        parseContext.set(Parser.class, super.getParser());
        parseContext.set(CrawlerContext.class, new CrawlerContext().setContentHandler(contentHandler));
        Metadata metadata = new Metadata();
        InputStream inputStream = null;
        if (contentHandler instanceof DataSinkContentHandler) {
            metadata = ((DataSinkContentHandler) contentHandler).getMetaData();
        }
        try {
            try {
                URLName uRLName = new URLName(file.toURI().toURL());
                metadata = URLStreamProvider.getURLStreamProvider(uRLName).addFirstMetadata(uRLName, metadata, parseContext);
                inputStream = URLStreamProvider.getURLStreamProvider(uRLName).getStream(uRLName, metadata, parseContext);
                getParser().parse(inputStream, contentHandler, metadata, parseContext);
                if (contentHandler instanceof DataSinkContentHandler) {
                    ((DataSinkContentHandler) contentHandler).crawlFinished();
                }
                if (inputStream != null) {
                    inputStream.close();
                }
            } catch (Throwable th) {
                ExceptionUtils.handleException(th, null, metadata, (CrawlerContext) parseContext.get(CrawlerContext.class), parseContext, 0, contentHandler);
                if (contentHandler instanceof DataSinkContentHandler) {
                    ((DataSinkContentHandler) contentHandler).crawlFinished();
                }
                if (inputStream != null) {
                    inputStream.close();
                }
            }
        } catch (Throwable th2) {
            if (contentHandler instanceof DataSinkContentHandler) {
                ((DataSinkContentHandler) contentHandler).crawlFinished();
            }
            if (inputStream != null) {
                inputStream.close();
            }
            throw th2;
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    public void parse(File file, ContentHandler contentHandler, ParseContext parseContext) throws IOException, SAXException, TikaException {
        parseContext.set(Parser.class, super.getParser());
        CrawlerContext crawlerContext = (CrawlerContext) parseContext.get(CrawlerContext.class);
        if (crawlerContext == null) {
            crawlerContext = new CrawlerContext();
            parseContext.set(CrawlerContext.class, crawlerContext);
        }
        crawlerContext.setContentHandler(contentHandler);
        Metadata metadata = new Metadata();
        InputStream inputStream = null;
        if (contentHandler instanceof DataSinkContentHandler) {
            metadata = ((DataSinkContentHandler) contentHandler).getMetaData();
        }
        try {
            try {
                URLName uRLName = new URLName(file.toURI().toURL());
                metadata = URLStreamProvider.getURLStreamProvider(uRLName).addFirstMetadata(uRLName, metadata, parseContext);
                inputStream = URLStreamProvider.getURLStreamProvider(uRLName).getStream(uRLName, metadata, parseContext);
                getParser().parse(inputStream, contentHandler, metadata, parseContext);
                if (contentHandler instanceof DataSinkContentHandler) {
                    ((DataSinkContentHandler) contentHandler).crawlFinished();
                }
                if (inputStream != null) {
                    inputStream.close();
                }
            } catch (Throwable th) {
                ExceptionUtils.handleException(th, null, metadata, (CrawlerContext) parseContext.get(CrawlerContext.class), parseContext, 0, contentHandler);
                if (contentHandler instanceof DataSinkContentHandler) {
                    ((DataSinkContentHandler) contentHandler).crawlFinished();
                }
                if (inputStream != null) {
                    inputStream.close();
                }
            }
        } catch (Throwable th2) {
            if (contentHandler instanceof DataSinkContentHandler) {
                ((DataSinkContentHandler) contentHandler).crawlFinished();
            }
            if (inputStream != null) {
                inputStream.close();
            }
            throw th2;
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    public void parse(File file, ParseContext parseContext) throws IOException, SAXException, TikaException {
        parseContext.set(Parser.class, super.getParser());
        Metadata metadata = new Metadata();
        InputStream inputStream = null;
        ContentHandler contentHandler = getContentHandler(parseContext);
        if (contentHandler instanceof DataSinkContentHandler) {
            metadata = ((DataSinkContentHandler) contentHandler).getMetaData();
        }
        try {
            try {
                URLName uRLName = new URLName(file.toURI().toURL());
                metadata = URLStreamProvider.getURLStreamProvider(uRLName).addFirstMetadata(uRLName, metadata, parseContext);
                inputStream = URLStreamProvider.getURLStreamProvider(uRLName).getStream(uRLName, metadata, parseContext);
                getParser().parse(inputStream, contentHandler, metadata, parseContext);
                if (contentHandler instanceof DataSinkContentHandler) {
                    ((DataSinkContentHandler) contentHandler).crawlFinished();
                }
                if (inputStream != null) {
                    inputStream.close();
                }
            } catch (Throwable th) {
                ExceptionUtils.handleException(th, null, metadata, new CrawlerContext(), parseContext, 0, contentHandler);
                if (contentHandler instanceof DataSinkContentHandler) {
                    ((DataSinkContentHandler) contentHandler).crawlFinished();
                }
                if (inputStream != null) {
                    inputStream.close();
                }
            }
        } catch (Throwable th2) {
            if (contentHandler instanceof DataSinkContentHandler) {
                ((DataSinkContentHandler) contentHandler).crawlFinished();
            }
            if (inputStream != null) {
                inputStream.close();
            }
            throw th2;
        }
    }

    public void parse(String str, ContentHandler contentHandler) throws IOException, SAXException, TikaException {
        parse(UrlUtil.sourceString2URL(str), contentHandler);
    }

    public String detect(String str) {
        try {
            return detect(UrlUtil.sourceString2URL(str));
        } catch (Throwable th) {
            Logger.getLogger(Leech.class.getName()).log(Level.SEVERE, "Error", th);
            return null;
        }
    }

    public void parse(String str, ContentHandler contentHandler, ParseContext parseContext) throws IOException, SAXException, TikaException {
        parse(UrlUtil.sourceString2URL(str), contentHandler, parseContext);
    }

    public void parse(String[] strArr, ContentHandler contentHandler, ParseContext parseContext) throws IOException, SAXException, TikaException {
        LinkedList linkedList = new LinkedList();
        for (String str : strArr) {
            linkedList.add(UrlUtil.sourceString2URL(str));
        }
        parse((URLName[]) linkedList.toArray(new URLName[0]), contentHandler, parseContext);
    }

    public void parse(String str, ParseContext parseContext) throws IOException, SAXException, TikaException {
        parse(UrlUtil.sourceString2URL(str), parseContext);
    }

    public Reader parse(URL url) throws IOException {
        throw new UnsupportedOperationException("The java.net.URL class methods are not supported because our mechanism supporting new protocols and the according stream creation differ.\nUse the according URLName method instead");
    }

    public Reader parse(URLName uRLName) throws IOException {
        URLName normalizeURL = UrlUtil.normalizeURL(uRLName);
        try {
            Metadata addFirstMetadata = URLStreamProvider.getURLStreamProvider(normalizeURL).addFirstMetadata(normalizeURL, null, null);
            return parse((InputStream) URLStreamProvider.getURLStreamProvider(normalizeURL).getStream(normalizeURL, addFirstMetadata, null), addFirstMetadata);
        } catch (Throwable th) {
            Logger.getLogger(Leech.class.getName()).log(Level.SEVERE, "Error", th);
            return null;
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    public void parse(URLName uRLName, ContentHandler contentHandler) throws IOException, SAXException, TikaException {
        URLName normalizeURL = UrlUtil.normalizeURL(uRLName);
        ParseContext parseContext = new ParseContext();
        parseContext.set(Parser.class, super.getParser());
        parseContext.set(CrawlerContext.class, new CrawlerContext().setContentHandler(contentHandler));
        Metadata metadata = new Metadata();
        InputStream inputStream = null;
        if (contentHandler instanceof DataSinkContentHandler) {
            metadata = ((DataSinkContentHandler) contentHandler).getMetaData();
        }
        try {
            try {
                metadata = URLStreamProvider.getURLStreamProvider(normalizeURL).addFirstMetadata(normalizeURL, metadata, parseContext);
                inputStream = URLStreamProvider.getURLStreamProvider(normalizeURL).getStream(normalizeURL, metadata, parseContext);
                getParser().parse(inputStream, contentHandler, metadata, parseContext);
                if (contentHandler instanceof DataSinkContentHandler) {
                    ((DataSinkContentHandler) contentHandler).crawlFinished();
                }
                if (inputStream != null) {
                    inputStream.close();
                }
            } catch (Throwable th) {
                ExceptionUtils.handleException(th, null, metadata, (CrawlerContext) parseContext.get(CrawlerContext.class), parseContext, 0, contentHandler);
                if (contentHandler instanceof DataSinkContentHandler) {
                    ((DataSinkContentHandler) contentHandler).crawlFinished();
                }
                if (inputStream != null) {
                    inputStream.close();
                }
            }
        } catch (Throwable th2) {
            if (contentHandler instanceof DataSinkContentHandler) {
                ((DataSinkContentHandler) contentHandler).crawlFinished();
            }
            if (inputStream != null) {
                inputStream.close();
            }
            throw th2;
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    public void parse(URLName uRLName, ContentHandler contentHandler, ParseContext parseContext) throws IOException, SAXException, TikaException {
        URLName normalizeURL = UrlUtil.normalizeURL(uRLName);
        parseContext.set(Parser.class, super.getParser());
        CrawlerContext crawlerContext = (CrawlerContext) parseContext.get(CrawlerContext.class);
        if (crawlerContext == null) {
            crawlerContext = new CrawlerContext();
            parseContext.set(CrawlerContext.class, crawlerContext);
        }
        crawlerContext.setContentHandler(contentHandler);
        Metadata metadata = new Metadata();
        InputStream inputStream = null;
        if (contentHandler instanceof DataSinkContentHandler) {
            metadata = ((DataSinkContentHandler) contentHandler).getMetaData();
        }
        try {
            try {
                metadata = URLStreamProvider.getURLStreamProvider(normalizeURL).addFirstMetadata(normalizeURL, metadata, parseContext);
                inputStream = URLStreamProvider.getURLStreamProvider(normalizeURL).getStream(normalizeURL, metadata, parseContext);
                getParser().parse(inputStream, contentHandler, metadata, parseContext);
                if (contentHandler instanceof DataSinkContentHandler) {
                    ((DataSinkContentHandler) contentHandler).crawlFinished();
                }
                if (inputStream != null) {
                    inputStream.close();
                }
            } catch (Throwable th) {
                ExceptionUtils.handleException(th, null, metadata, (CrawlerContext) parseContext.get(CrawlerContext.class), parseContext, 0, contentHandler);
                if (contentHandler instanceof DataSinkContentHandler) {
                    ((DataSinkContentHandler) contentHandler).crawlFinished();
                }
                if (inputStream != null) {
                    inputStream.close();
                }
            }
        } catch (Throwable th2) {
            if (contentHandler instanceof DataSinkContentHandler) {
                ((DataSinkContentHandler) contentHandler).crawlFinished();
            }
            if (inputStream != null) {
                inputStream.close();
            }
            throw th2;
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    public void parse(URLName[] uRLNameArr, ContentHandler contentHandler, ParseContext parseContext) throws IOException, SAXException, TikaException {
        for (int i = 0; i < uRLNameArr.length; i++) {
            uRLNameArr[i] = UrlUtil.normalizeURL(uRLNameArr[i]);
        }
        parseContext.set(Parser.class, super.getParser());
        CrawlerContext crawlerContext = (CrawlerContext) parseContext.get(CrawlerContext.class);
        if (crawlerContext == null) {
            crawlerContext = new CrawlerContext();
            parseContext.set(CrawlerContext.class, crawlerContext);
        }
        crawlerContext.setContentHandler(contentHandler);
        Metadata metadata = new Metadata();
        ByteArrayInputStream byteArrayInputStream = null;
        if (contentHandler instanceof DataSinkContentHandler) {
            metadata = ((DataSinkContentHandler) contentHandler).getMetaData();
        }
        try {
            try {
                String uid = new UID().toString();
                metadata.add("resourceName", "leechUrlList " + uid);
                metadata.add(DublinCore.SOURCE, uid + "_leechUrlList.urlList");
                metadata.add(IncrementalCrawlingHistory.dataEntityId, uid + "_leechUrlList.urlList");
                metadata.add(IncrementalCrawlingHistory.dataEntityContentFingerprint, uid + "_leechUrlList.urlList");
                metadata.add("Content-Type", "application/leechUrlList");
                String str = "";
                for (URLName uRLName : uRLNameArr) {
                    str = str + uRLName.toString() + "\n";
                }
                byteArrayInputStream = new ByteArrayInputStream(str.getBytes());
                int crawlingDepth = crawlerContext.getCrawlingDepth();
                if (crawlingDepth < Integer.MAX_VALUE) {
                    crawlerContext.setCrawlingDepth(crawlingDepth + 1);
                }
                getParser().parse(byteArrayInputStream, contentHandler, metadata, parseContext);
                if (contentHandler instanceof DataSinkContentHandler) {
                    ((DataSinkContentHandler) contentHandler).crawlFinished();
                }
                if (byteArrayInputStream != null) {
                    byteArrayInputStream.close();
                }
            } catch (Throwable th) {
                ExceptionUtils.handleException(th, null, metadata, (CrawlerContext) parseContext.get(CrawlerContext.class), parseContext, 0, contentHandler);
                if (contentHandler instanceof DataSinkContentHandler) {
                    ((DataSinkContentHandler) contentHandler).crawlFinished();
                }
                if (byteArrayInputStream != null) {
                    byteArrayInputStream.close();
                }
            }
        } catch (Throwable th2) {
            if (contentHandler instanceof DataSinkContentHandler) {
                ((DataSinkContentHandler) contentHandler).crawlFinished();
            }
            if (byteArrayInputStream != null) {
                byteArrayInputStream.close();
            }
            throw th2;
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    public void parse(URLName uRLName, ParseContext parseContext) throws IOException, SAXException, TikaException {
        URLName normalizeURL = UrlUtil.normalizeURL(uRLName);
        parseContext.set(Parser.class, super.getParser());
        Metadata metadata = new Metadata();
        InputStream inputStream = null;
        ContentHandler contentHandler = getContentHandler(parseContext);
        if (contentHandler instanceof DataSinkContentHandler) {
            metadata = ((DataSinkContentHandler) contentHandler).getMetaData();
        }
        try {
            try {
                metadata = URLStreamProvider.getURLStreamProvider(normalizeURL).addFirstMetadata(normalizeURL, metadata, parseContext);
                inputStream = URLStreamProvider.getURLStreamProvider(normalizeURL).getStream(normalizeURL, metadata, parseContext);
                getParser().parse(inputStream, contentHandler, metadata, parseContext);
                if (contentHandler instanceof DataSinkContentHandler) {
                    ((DataSinkContentHandler) contentHandler).crawlFinished();
                }
                if (inputStream != null) {
                    inputStream.close();
                }
            } catch (Throwable th) {
                ExceptionUtils.handleException(th, null, metadata, (CrawlerContext) parseContext.get(CrawlerContext.class), parseContext, 0, contentHandler);
                if (contentHandler instanceof DataSinkContentHandler) {
                    ((DataSinkContentHandler) contentHandler).crawlFinished();
                }
                if (inputStream != null) {
                    inputStream.close();
                }
            }
        } catch (Throwable th2) {
            if (contentHandler instanceof DataSinkContentHandler) {
                ((DataSinkContentHandler) contentHandler).crawlFinished();
            }
            if (inputStream != null) {
                inputStream.close();
            }
            throw th2;
        }
    }

    public String parseToString(File file) throws IOException, TikaException {
        return parseToString(new URLName(file.toURI().toURL()));
    }

    public String parseToString(URL url) throws IOException, TikaException {
        throw new UnsupportedOperationException("The java.net.URL class methods are not supported because our mechanism supporting new protocols and the according stream creation differ.\nUse the according URLName method instead");
    }

    public String parseToString(URLName uRLName) throws IOException, TikaException {
        URLName normalizeURL = UrlUtil.normalizeURL(uRLName);
        try {
            Metadata addFirstMetadata = URLStreamProvider.getURLStreamProvider(normalizeURL).addFirstMetadata(normalizeURL, null, null);
            return parseToString(URLStreamProvider.getURLStreamProvider(normalizeURL).getStream(normalizeURL, addFirstMetadata, null), addFirstMetadata);
        } catch (Throwable th) {
            throw new TikaException("Error while parsing " + normalizeURL.getFile(), th);
        }
    }
}
