package de.dfki.leech.foresight;

import de.dfki.km.leech.Leech;
import de.dfki.km.leech.SubDataEntityContentHandler;
import de.dfki.km.leech.sax.PrintlnContentHandler;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/* loaded from: input_file:de/dfki/leech/foresight/SagePubDocumentParser.class */
public class SagePubDocumentParser extends AbstractParser {
    private static final long serialVersionUID = -4358818721569908234L;
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(new HashSet(Arrays.asList(MediaType.application("vnd.fs.sagepubDocument"))));

    public static void main(String[] strArr) throws Exception {
        new Leech().parse("http://erg.sagepub.com/content/17/3/14.abstract", new PrintlnContentHandler(PrintlnContentHandler.Verbosity.all), new ParseContext());
    }

    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return SUPPORTED_TYPES;
    }

    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        inputStream.close();
        String str = metadata.get("source");
        Document document = Jsoup.connect(str).get();
        Elements select = document.select(".slug-pub-date");
        if (select.isEmpty()) {
            select = document.select(".slug-ahead-of-print-date");
        }
        if (!select.isEmpty()) {
            String text = select.text();
            metadata.remove("modified");
            metadata.add("modified", text);
        }
        boolean z = false;
        Elements select2 = document.select("#article-title-1");
        if (select2.isEmpty() && !document.select("#hw-login-msg-box-text").isEmpty()) {
            select2 = document.select(".cit-title");
            z = true;
        }
        if (!select2.isEmpty()) {
            metadata.remove("title");
            metadata.add("title", select2.text());
        }
        Elements select3 = document.select(".contributor-list");
        if (!select3.isEmpty()) {
            Elements select4 = select3.select(".name-search");
            metadata.remove("creator");
            Iterator it = select4.iterator();
            while (it.hasNext()) {
                metadata.add("creator", ((Element) it.next()).text());
            }
        }
        Elements select5 = document.select(".abstract");
        String text2 = select5.isEmpty() ? "" : select5.text();
        if (text2.startsWith("Abstract ")) {
            text2 = text2.replaceFirst("Abstract ", "");
        }
        HashSet hashSet = new HashSet();
        Elements select6 = document.select(".cit-auth");
        if (select6.isEmpty()) {
            Document document2 = Jsoup.connect(str.replaceFirst("\\D*$", ".refs")).get();
            if (document2.select("#hw-login-msg-box-text").isEmpty()) {
                select6 = document2.select(".cit-auth");
            }
        }
        if (!select6.isEmpty()) {
            metadata.remove("referenceAuthor");
            Iterator it2 = select6.iterator();
            while (it2.hasNext()) {
                hashSet.add(((Element) it2.next()).text());
            }
            Iterator it3 = hashSet.iterator();
            while (it3.hasNext()) {
                String str2 = (String) it3.next();
                if (z && metadata.get("creator") == null) {
                    metadata.add("creator", str2);
                } else {
                    metadata.add("referenceAuthor", str2);
                }
            }
        }
        new SubDataEntityContentHandler(contentHandler, metadata, text2).triggerSubDataEntityHandling();
    }
}
