package de.dfki.leech.foresight;

import de.dfki.km.leech.Leech;
import de.dfki.km.leech.SubDataEntityContentHandler;
import de.dfki.km.leech.sax.PrintlnContentHandler;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.logging.LogFactory;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.openqa.selenium.By;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.htmlunit.HtmlUnitDriver;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/* loaded from: input_file:de/dfki/leech/foresight/ScienceDirectDocumentParser.class */
public class ScienceDirectDocumentParser extends AbstractParser {
    private static final long serialVersionUID = -4358818721569908234L;
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(new HashSet(Arrays.asList(MediaType.application("vnd.fs.sciencedirectDocument"))));

    public static void main(String[] strArr) throws Exception {
        new Leech().parse("http://www.sciencedirect.com/science/article/pii/S1569190X11001973", new PrintlnContentHandler(PrintlnContentHandler.Verbosity.all), new ParseContext());
    }

    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return SUPPORTED_TYPES;
    }

    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        inputStream.close();
        String str = metadata.get("source");
        LogFactory.getFactory().setAttribute("org.apache.commons.logging.Log", "org.apache.commons.logging.impl.NoOpLog");
        Logger.getLogger("com.gargoylesoftware.htmlunit").setLevel(Level.OFF);
        Logger.getLogger("org.apache.commons.httpclient").setLevel(Level.OFF);
        HtmlUnitDriver htmlUnitDriver = new HtmlUnitDriver(true);
        try {
            htmlUnitDriver.manage().timeouts().implicitlyWait(10L, TimeUnit.SECONDS);
            htmlUnitDriver.get(str);
            List findElements = htmlUnitDriver.findElements(By.cssSelector(".volIssue"));
            if (!findElements.isEmpty()) {
                for (String str2 : ((WebElement) findElements.get(0)).getText().split(",")) {
                    if (!str2.toLowerCase().contains("volume") && !str2.toLowerCase().contains("issue") && !str2.toLowerCase().contains("pages")) {
                        metadata.remove("modified");
                        metadata.add("modified", str2.trim());
                    }
                }
            }
            List findElements2 = htmlUnitDriver.findElements(By.cssSelector("title"));
            if (!findElements2.isEmpty()) {
                metadata.remove("title");
                metadata.add("title", ((WebElement) findElements2.get(0)).getText());
            }
            List findElements3 = htmlUnitDriver.findElements(By.cssSelector(".authorName"));
            if (!findElements3.isEmpty()) {
                metadata.remove("creator");
                Iterator it = findElements3.iterator();
                while (it.hasNext()) {
                    metadata.add("creator", ((WebElement) it.next()).getText());
                }
            }
            String str3 = "";
            List findElements4 = htmlUnitDriver.findElements(By.cssSelector("div.abstract"));
            if (!findElements4.isEmpty()) {
                List findElements5 = ((WebElement) findElements4.get(0)).findElements(By.cssSelector("p"));
                if (!findElements5.isEmpty()) {
                    str3 = ((WebElement) findElements5.get(0)).getText();
                }
            }
            new SubDataEntityContentHandler(contentHandler, metadata, str3).triggerSubDataEntityHandling();
            htmlUnitDriver.quit();
        } catch (Throwable th) {
            htmlUnitDriver.quit();
            throw th;
        }
    }
}
