package de.dfki.km.leech.parser;

import de.dfki.km.leech.Leech;
import de.dfki.km.leech.config.CrawlerContext;
import de.dfki.km.leech.config.DirectoryCrawlerContext;
import de.dfki.km.leech.detect.DatasourceMediaTypes;
import de.dfki.km.leech.io.URLStreamProvider;
import de.dfki.km.leech.util.MultiValueHashMap;
import de.dfki.km.leech.util.OSUtils;
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Set;
import java.util.concurrent.SynchronousQueue;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.mail.URLName;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.xml.sax.ContentHandler;

/* loaded from: input_file:de/dfki/km/leech/parser/DirectoryCrawlerParser.class */
public class DirectoryCrawlerParser extends CrawlerParser {
    private static final long serialVersionUID = 1824851369780822093L;
    protected Leech m_leech;

    /* renamed from: de.dfki.km.leech.parser.DirectoryCrawlerParser$1, reason: invalid class name */
    /* loaded from: input_file:de/dfki/km/leech/parser/DirectoryCrawlerParser$1.class */
    class AnonymousClass1 implements Runnable {
        boolean m_bStopWasRequested = false;
        final /* synthetic */ File val$fFinalDir;
        final /* synthetic */ CrawlerContext val$crawlerContext;
        final /* synthetic */ DirectoryCrawlerContext val$directoryCrawlerContext;
        final /* synthetic */ OneAfterOneIterator val$oneAfterOneIterator;

        AnonymousClass1(File file, CrawlerContext crawlerContext, DirectoryCrawlerContext directoryCrawlerContext, OneAfterOneIterator oneAfterOneIterator) {
            this.val$fFinalDir = file;
            this.val$crawlerContext = crawlerContext;
            this.val$directoryCrawlerContext = directoryCrawlerContext;
            this.val$oneAfterOneIterator = oneAfterOneIterator;
        }

        @Override // java.lang.Runnable
        public void run() {
            this.val$fFinalDir.listFiles(new FileFilter() { // from class: de.dfki.km.leech.parser.DirectoryCrawlerParser.1.1
                @Override // java.io.FileFilter
                public boolean accept(File file) {
                    if (AnonymousClass1.this.val$crawlerContext.stopRequested().booleanValue()) {
                        if (AnonymousClass1.this.m_bStopWasRequested) {
                            return false;
                        }
                        AnonymousClass1.this.m_bStopWasRequested = true;
                        return false;
                    }
                    File checkIfInConstraints = DirectoryCrawlerParser.this.checkIfInConstraints(file, AnonymousClass1.this.val$crawlerContext, AnonymousClass1.this.val$directoryCrawlerContext);
                    if (checkIfInConstraints == null) {
                        return false;
                    }
                    MultiValueHashMap<String, Object> multiValueHashMap = new MultiValueHashMap<>();
                    multiValueHashMap.add("fileObject", checkIfInConstraints);
                    multiValueHashMap.add(CrawlerParser.SOURCEID, checkIfInConstraints.getAbsolutePath());
                    AnonymousClass1.this.val$oneAfterOneIterator.addNextElement(multiValueHashMap);
                    return false;
                }
            });
            if (this.m_bStopWasRequested || this.val$crawlerContext.stopRequested().booleanValue()) {
                return;
            }
            this.val$oneAfterOneIterator.addNextElement(OneAfterOneIterator.m_noMoreLeftMarker);
        }
    }

    /* loaded from: input_file:de/dfki/km/leech/parser/DirectoryCrawlerParser$OneAfterOneIterator.class */
    protected static class OneAfterOneIterator implements Iterator<MultiValueHashMap<String, Object>> {
        static final MultiValueHashMap<String, Object> m_noMoreLeftMarker = new MultiValueHashMap<>();
        protected MultiValueHashMap<String, Object> m_nextElement;
        protected SynchronousQueue<MultiValueHashMap<String, Object>> m_synchronousQueue = new SynchronousQueue<>();

        protected OneAfterOneIterator() {
        }

        public void addNextElement(MultiValueHashMap<String, Object> multiValueHashMap) {
            try {
                this.m_synchronousQueue.put(multiValueHashMap);
            } catch (InterruptedException e) {
                Logger.getLogger(OneAfterOneIterator.class.getName()).log(Level.SEVERE, "Error", (Throwable) e);
            }
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            try {
                this.m_nextElement = this.m_synchronousQueue.take();
                return m_noMoreLeftMarker != this.m_nextElement;
            } catch (InterruptedException e) {
                Logger.getLogger(OneAfterOneIterator.class.getName()).log(Level.SEVERE, "Error", (Throwable) e);
                return false;
            }
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.Iterator
        public MultiValueHashMap<String, Object> next() {
            return this.m_nextElement;
        }

        @Override // java.util.Iterator
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }

    protected File checkIfInConstraints(File file, CrawlerContext crawlerContext, DirectoryCrawlerContext directoryCrawlerContext) {
        String absolutePath;
        String canonicalPath;
        File file2 = file;
        try {
            absolutePath = file.getAbsolutePath();
            canonicalPath = file.getCanonicalPath();
        } catch (IOException e) {
            Logger.getLogger(DirectoryCrawlerParser.class.getName()).log(Level.WARNING, "Unable to resolve file to its canonical form, continuing with original file: " + file, (Throwable) e);
        }
        if (!directoryCrawlerContext.getFollowSymbolicLinks() && !absolutePath.equals(canonicalPath)) {
            if (!crawlerContext.getVerbose().booleanValue()) {
                return null;
            }
            Logger.getLogger(DirectoryCrawlerParser.class.getName()).info("File " + file.toURI() + " is a symbolic link that should be ignored. Skipping.");
            return null;
        }
        file2 = new File(canonicalPath);
        if (!crawlerContext.getURLFilter().accept(file2.toURI().toString())) {
            if (!crawlerContext.getVerbose().booleanValue()) {
                return null;
            }
            Logger.getLogger(CrawlerParser.class.getName()).info("File " + file2.toURI() + " is outside the URL constraints for this data source. Skipping.");
            return null;
        }
        if ((directoryCrawlerContext.getIgnoreHiddenFiles() && file2.isHidden()) || OSUtils.isMacOSXBundle(file2)) {
            return null;
        }
        if (file2.canRead()) {
            return file2;
        }
        if (!crawlerContext.getVerbose().booleanValue()) {
            return null;
        }
        Logger.getLogger(DirectoryCrawlerParser.class.getName()).info("Can't read file " + file2.toURI() + ". Skipping.");
        return null;
    }

    @Override // de.dfki.km.leech.parser.CrawlerParser
    protected Iterator<MultiValueHashMap<String, Object>> getSubDataEntitiesInformation(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws Exception {
        String str = metadata.get("source");
        CrawlerContext crawlerContext = (CrawlerContext) parseContext.get(CrawlerContext.class, new CrawlerContext());
        DirectoryCrawlerContext directoryCrawlerContext = (DirectoryCrawlerContext) parseContext.get(DirectoryCrawlerContext.class, new DirectoryCrawlerContext());
        File file = new File(new URL(str).toURI());
        if (!file.isDirectory()) {
            throw new IllegalStateException("' " + str + "' is no directory");
        }
        File checkIfInConstraints = checkIfInConstraints(file, crawlerContext, directoryCrawlerContext);
        if (checkIfInConstraints == null) {
            return new LinkedList().iterator();
        }
        OneAfterOneIterator oneAfterOneIterator = new OneAfterOneIterator();
        new Thread(new AnonymousClass1(checkIfInConstraints, crawlerContext, directoryCrawlerContext, oneAfterOneIterator), "DirectoryCrawlserParser listFiles").start();
        return oneAfterOneIterator;
    }

    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return Collections.singleton(DatasourceMediaTypes.DIRECTORY);
    }

    @Override // de.dfki.km.leech.parser.CrawlerParser
    protected void processCurrentDataEntity(InputStream inputStream, Metadata metadata, ContentHandler contentHandler, ParseContext parseContext) throws Exception {
    }

    @Override // de.dfki.km.leech.parser.CrawlerParser
    protected void processSubDataEntity(MultiValueHashMap<String, Object> multiValueHashMap, Metadata metadata, ContentHandler contentHandler, ParseContext parseContext) throws Exception {
        URLName uRLName = new URLName(((File) multiValueHashMap.getFirst("fileObject")).toURI().toURL());
        Metadata addFirstMetadata = URLStreamProvider.getURLStreamProvider(uRLName).addFirstMetadata(uRLName, metadata, parseContext);
        TikaInputStream stream = URLStreamProvider.getURLStreamProvider(uRLName).getStream(uRLName, addFirstMetadata, parseContext);
        try {
            if (this.m_leech == null) {
                this.m_leech = new Leech();
            }
            this.m_leech.getParser().parse(stream, contentHandler, addFirstMetadata, parseContext);
            if (stream != null) {
                stream.close();
            }
        } catch (Throwable th) {
            if (stream != null) {
                stream.close();
            }
            throw th;
        }
    }
}
