package de.dfki.catwiesel.synchronizer.importer.xml;

import de.dfki.catwiesel.categorymanager.CategoryManager;
import de.dfki.catwiesel.index.AllTypesMultiValueMap;
import de.dfki.catwiesel.index.IndexManagerQueue;
import de.dfki.catwiesel.synchronizer.importer.EnhancedRawData;
import de.dfki.catwiesel.synchronizer.importer.ImportConfiguration;
import de.dfki.catwiesel.synchronizer.importer.Importer;
import de.dfki.catwiesel.synchronizer.importer.ImporterException;
import de.dfki.catwiesel.synchronizer.importer.ImporterHelper;
import de.dfki.catwiesel.util.Catwiesel;
import de.dfki.catwiesel.util.DateParser;
import de.dfki.catwiesel.util.FileHandling;
import de.dfki.catwiesel.util.MetaData;
import de.dfki.catwiesel.util.SimpleMultiValueMap;
import de.dfki.catwiesel.vocabulary.AttributeURIs;
import de.dfki.catwiesel.vocabulary.StringConstants;
import de.dfki.inquisition.collections.ConfigurationException;
import de.dfki.inquisition.collections.MultiValueConfiguration;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.ontoware.rdf2go.model.node.URI;
import org.ontoware.rdf2go.model.node.impl.URIImpl;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.w3c.tidy.Tidy;
import org.xml.sax.SAXException;

/* loaded from: input_file:de/dfki/catwiesel/synchronizer/importer/xml/RssFeedImporter.class */
public class RssFeedImporter implements Importer {
    public static final String CHANNEL_TAGNAME = "channel";
    public static final String ITEM_TAGNAME = "item";
    private Map<String, ImportConfiguration> m_importCapabilities;
    private ImporterHelper m_importerHelper;
    private HashSet<String> m_containedLinks;
    private static Map<String, SimpleMultiValueMap<String, URI>> m_attributeMappingTable = createAttributeMappingTable();
    private static Logger m_logger = Logger.getLogger(RssFeedImporter.class.getName());
    private HashMap<Thread, Boolean> m_stopRequests = new HashMap<>();
    private IndexManagerQueue m_indexManagerQueue = IndexManagerQueue.getInstance();
    private DocumentBuilderFactory m_documentBuilderFactory = DocumentBuilderFactory.newInstance();
    private List<String> m_typeList = new LinkedList();

    public RssFeedImporter(MultiValueConfiguration multiValueConfiguration, ImporterHelper importerHelper, CategoryManager categoryManager) throws ImporterException {
        this.m_importerHelper = importerHelper;
        this.m_typeList.add(StringConstants.IMPORT_TYPE_RSS_FEED);
        this.m_typeList.add(StringConstants.IMPORT_TYPE_RSS_FEED_ITEM);
        createImportCapabilities();
    }

    private static Map<String, SimpleMultiValueMap<String, URI>> createAttributeMappingTable() {
        HashMap hashMap = new HashMap();
        SimpleMultiValueMap simpleMultiValueMap = new SimpleMultiValueMap();
        simpleMultiValueMap.add("title", AttributeURIs.CATEGORY_NAME);
        simpleMultiValueMap.add("link", AttributeURIs.SOURCE);
        hashMap.put(CHANNEL_TAGNAME, simpleMultiValueMap);
        SimpleMultiValueMap simpleMultiValueMap2 = new SimpleMultiValueMap();
        simpleMultiValueMap2.add("title", AttributeURIs.TITLE);
        simpleMultiValueMap2.add("link", AttributeURIs.SOURCE);
        simpleMultiValueMap2.add("description", AttributeURIs.ABSTRACT);
        simpleMultiValueMap2.add("content:encoded", AttributeURIs.CONTENT);
        simpleMultiValueMap2.add("pubDate", AttributeURIs.MODIFICATION_DATE);
        simpleMultiValueMap2.add(AttributeURIs.DATE.toString(), AttributeURIs.MODIFICATION_DATE);
        simpleMultiValueMap2.add("dc:date", AttributeURIs.MODIFICATION_DATE);
        simpleMultiValueMap2.add("dc:creator", AttributeURIs.CREATOR);
        simpleMultiValueMap2.add("author", AttributeURIs.CREATOR);
        hashMap.put(ITEM_TAGNAME, simpleMultiValueMap2);
        return hashMap;
    }

    private void createImportCapabilities() {
        this.m_importCapabilities = new HashMap();
        ImportConfiguration importConfiguration = new ImportConfiguration(StringConstants.IMPORT_TYPE_RSS_FEED);
        importConfiguration.add(ImportConfiguration.SOURCE_KEY, "The pathname of the rss feed file");
        importConfiguration.add(ImportConfiguration.PARENT_URI_KEY, "The URI of the parent category.");
        importConfiguration.addDefaultImportAttributes();
        this.m_importCapabilities.put(StringConstants.IMPORT_TYPE_RSS_FEED, importConfiguration);
    }

    @Override // de.dfki.catwiesel.synchronizer.importer.Importer
    public boolean process(EnhancedRawData enhancedRawData) throws ImporterException {
        if (!this.m_typeList.contains(enhancedRawData.getImportType())) {
            return false;
        }
        if (this.m_stopRequests.containsKey(Thread.currentThread())) {
            getLogger().info("Got stop request");
            return true;
        }
        Object data = enhancedRawData.getData();
        getLogger().fine("processing data " + enhancedRawData);
        if (data instanceof InputStream) {
            return processRssFeedStream(enhancedRawData);
        }
        if (data instanceof File) {
            return processRssFeedFile(enhancedRawData);
        }
        if (enhancedRawData.getData() instanceof Node) {
            return processRssFeedItem(enhancedRawData);
        }
        getLogger().log(Level.WARNING, "Data is not an instance of a known class");
        throw new ImporterException("Data is not an instance of a known class");
    }

    private boolean processRssFeedItem(EnhancedRawData enhancedRawData) throws ImporterException {
        NodeList childNodes = ((Node) enhancedRawData.getData()).getChildNodes();
        AllTypesMultiValueMap createForDocument = AllTypesMultiValueMap.createForDocument(enhancedRawData);
        AllTypesMultiValueMap.addAdditionalMetaAttributes(createForDocument, enhancedRawData.getMetaAttributeIterator());
        createForDocument.add(AttributeURIs.MIME_TYPE, StringConstants.IMPORT_TYPE_HTML);
        createForDocument.removeAll(AttributeURIs.SOURCE);
        for (int i = 0; i < childNodes.getLength(); i++) {
            addAttributeIfTranslatable(ITEM_TAGNAME, childNodes.item(i), createForDocument);
        }
        replaceUriWithCorrectUriFromSourceAttribute(createForDocument);
        ensureCorrectTranslation(createForDocument, enhancedRawData);
        createForDocument.add(StringConstants.INDEX_TYPE_STRUCTURE, AttributeURIs.PARENT_URI, enhancedRawData.getParentURI());
        MetaData.checkValidityOfMapAndAutocorrect(createForDocument);
        getLogger().finer(String.valueOf(getClass().getName()) + " is putting map \"" + createForDocument + "\" of rss feed item into the IndexManagerQueue");
        this.m_indexManagerQueue.insert(createForDocument);
        return true;
    }

    private void replaceUriWithCorrectUriFromSourceAttribute(AllTypesMultiValueMap allTypesMultiValueMap) throws ImporterException {
        Set<Object> set = allTypesMultiValueMap.get(StringConstants.INDEX_TYPE_TEXT, AttributeURIs.SOURCE);
        if (set.size() != 1) {
            throw new ImporterException("The source attribute of a feed item could not be found");
        }
        allTypesMultiValueMap.overwriteURI(this.m_importerHelper.getUriBySource((String) set.iterator().next(), false));
    }

    private void ensureCorrectTranslation(AllTypesMultiValueMap allTypesMultiValueMap, EnhancedRawData enhancedRawData) throws ImporterException {
        if (allTypesMultiValueMap.containsAttribute(AttributeURIs.CONTENT)) {
            return;
        }
        if (!allTypesMultiValueMap.containsAttribute(AttributeURIs.ABSTRACT)) {
            getLogger().log(Level.WARNING, "Could not find content of item '" + enhancedRawData.getSource() + "'");
            throw new ImporterException("Could not find content of item '" + enhancedRawData.getSource() + "'");
        }
        allTypesMultiValueMap.addAll(StringConstants.INDEX_TYPE_TEXT, AttributeURIs.CONTENT, allTypesMultiValueMap.get(StringConstants.INDEX_TYPE_TEXT, AttributeURIs.ABSTRACT));
        allTypesMultiValueMap.removeAll(AttributeURIs.ABSTRACT);
    }

    private boolean processRssFeedFile(EnhancedRawData enhancedRawData) throws ImporterException {
        File file = (File) enhancedRawData.getData();
        String str = null;
        try {
            str = file.getCanonicalPath();
            EnhancedRawData enhancedRawData2 = this.m_importerHelper.getEnhancedRawData((Object) new FileInputStream(file), str, enhancedRawData.getParentURI(), enhancedRawData.getImportType(), false);
            EnhancedRawData.addAdditionalMetaAttributes(enhancedRawData2, enhancedRawData.getMetaAttributeIterator());
            return processRssFeedStream(enhancedRawData2);
        } catch (FileNotFoundException e) {
            getLogger().log(Level.WARNING, "Could not find file '" + str + "'", (Throwable) e);
            throw new ImporterException("Could not find file '" + str + "'", e);
        } catch (IOException e2) {
            getLogger().log(Level.WARNING, "Error while processing file '" + str + "'", (Throwable) e2);
            throw new ImporterException("Error while processing file '" + str + "'", e2);
        }
    }

    private boolean processRssFeedStream(EnhancedRawData enhancedRawData) throws ImporterException {
        try {
            InputStream inputStream = (InputStream) enhancedRawData.getData();
            AllTypesMultiValueMap createForCategory = AllTypesMultiValueMap.createForCategory(enhancedRawData);
            AllTypesMultiValueMap.addAdditionalMetaAttributes(createForCategory, enhancedRawData.getMetaAttributeIterator());
            createForCategory.add(AttributeURIs.MIME_TYPE, StringConstants.IMPORT_TYPE_HTML);
            createForCategory.removeAll(AttributeURIs.SOURCE);
            List<Node> extractTagsOfInterest = extractTagsOfInterest(parseStream(inputStream), createForCategory);
            replaceFeedFileUriWithCorrectUriFromTitleAttribute(createForCategory);
            createForCategory.add(StringConstants.INDEX_TYPE_STRUCTURE, AttributeURIs.PARENT_URI, enhancedRawData.getParentURI());
            getLogger().finer(String.valueOf(getClass().getName()) + " is putting map \"" + createForCategory + "\" of rss feed stream into the IndexManagerQueue");
            this.m_indexManagerQueue.insert(createForCategory);
            Iterator<Node> it = extractTagsOfInterest.iterator();
            while (it.hasNext()) {
                EnhancedRawData enhancedRawData2 = this.m_importerHelper.getEnhancedRawData((Object) it.next(), "toBeExtracted", createForCategory.getURI(), StringConstants.IMPORT_TYPE_RSS_FEED_ITEM, false);
                EnhancedRawData.addAdditionalMetaAttributes(enhancedRawData2, enhancedRawData.getMetaAttributeIterator());
                process(enhancedRawData2);
            }
            return true;
        } catch (Throwable th) {
            getLogger().log(Level.WARNING, "Error while processing stream '" + enhancedRawData.getSource() + "'", th);
            throw new ImporterException("Error while processing stream '" + enhancedRawData.getSource() + "'", th);
        }
    }

    private void replaceFeedFileUriWithCorrectUriFromTitleAttribute(AllTypesMultiValueMap allTypesMultiValueMap) throws ImporterException {
        Set<Object> set = allTypesMultiValueMap.get(StringConstants.INDEX_TYPE_TEXT, AttributeURIs.SOURCE);
        if (set == null || set.size() != 1) {
            throw new ImporterException("The source attribute of a channel item could not be found");
        }
        allTypesMultiValueMap.overwriteURI(this.m_importerHelper.getUriBySource((String) set.iterator().next(), false));
    }

    private List<Node> extractTagsOfInterest(Document document, AllTypesMultiValueMap allTypesMultiValueMap) throws ImporterException {
        LinkedList linkedList = new LinkedList();
        NodeList elementsByTagName = document.getElementsByTagName(CHANNEL_TAGNAME);
        if (elementsByTagName.getLength() != 1) {
            throw new ImporterException("Error: could not find 'channel' element");
        }
        Node item = elementsByTagName.item(0);
        if (CHANNEL_TAGNAME.equals(item.getNodeName())) {
            NodeList childNodes = item.getChildNodes();
            for (int i = 0; i < childNodes.getLength(); i++) {
                Node item2 = childNodes.item(i);
                if (ITEM_TAGNAME.equals(item2.getNodeName())) {
                    linkedList.addLast(item2);
                } else {
                    addAttributeIfTranslatable(CHANNEL_TAGNAME, item2, allTypesMultiValueMap);
                }
            }
        }
        return linkedList;
    }

    private void addAttributeIfTranslatable(String str, Node node, AllTypesMultiValueMap allTypesMultiValueMap) throws ImporterException {
        Set<URI> set;
        SimpleMultiValueMap<String, URI> simpleMultiValueMap = getAttributeMappingTable().get(str);
        if (simpleMultiValueMap == null || (set = simpleMultiValueMap.get(node.getNodeName())) == null) {
            return;
        }
        for (URI uri : set) {
            this.m_containedLinks = new HashSet<>();
            String nodeText = getNodeText(node);
            if (this.m_containedLinks.size() > 0) {
                Iterator<String> it = this.m_containedLinks.iterator();
                while (it.hasNext()) {
                    allTypesMultiValueMap.add(AttributeURIs.LINKS_IN_CONTENT, it.next());
                }
            }
            if (uri.equals(AttributeURIs.SOURCE)) {
                URIImpl uRIImpl = new URIImpl(nodeText);
                String obj = uRIImpl.toString();
                String humanReadableSourceString = Catwiesel.getHumanReadableSourceString(uRIImpl);
                allTypesMultiValueMap.add(AttributeURIs.SOURCE, obj);
                allTypesMultiValueMap.add(AttributeURIs.SOURCE_HUMAN_READABLE, humanReadableSourceString);
            } else if (MetaData.isRepresentingADateObject(uri)) {
                allTypesMultiValueMap.add(uri, DateParser.parseDateString(nodeText));
            } else {
                allTypesMultiValueMap.add(uri, nodeText);
            }
        }
    }

    private String getNodeText(Node node) throws ImporterException {
        String str = "";
        NodeList childNodes = node.getChildNodes();
        for (int i = 0; i < childNodes.getLength(); i++) {
            Node item = childNodes.item(i);
            String nodeName = item.getNodeName();
            if (nodeName.equals("#text")) {
                str = String.valueOf(str) + item.getNodeValue();
            } else if (nodeName.equals("#cdata-section")) {
                String nodeValue = item.getNodeValue();
                String parseHtmlString = parseHtmlString(nodeValue);
                str = String.valueOf(str) + parseHtmlString;
                if (getLogger().isLoggable(Level.FINEST)) {
                    getLogger().finest("Translated cdata string '" + nodeValue + "'\n-->\n'" + parseHtmlString + "'\n***");
                }
            } else {
                getLogger().finest("Node with name '" + nodeName + "' was not translated");
            }
        }
        return str.trim();
    }

    private String parseHtmlString(String str) throws ImporterException {
        try {
            ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(("<head><meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\"><title></title></head><body>" + str + "</body>").getBytes(FileHandling.ENCODING_TYPE_UTF8));
            Tidy tidy = new Tidy();
            tidy.setCharEncoding(3);
            tidy.setMakeClean(true);
            tidy.setRawOut(false);
            return getHtmlBody(tidy.parseDOM(byteArrayInputStream, (OutputStream) null).getDocumentElement());
        } catch (UnsupportedEncodingException e) {
            throw new ImporterException("Could not create data with correct encoding", e);
        }
    }

    private String getHtmlBody(Element element) {
        if (element == null) {
            return null;
        }
        NodeList elementsByTagName = element.getElementsByTagName("body");
        return elementsByTagName.getLength() > 0 ? getTextOfHtmlTag(elementsByTagName.item(0)) : "";
    }

    protected String getTextOfHtmlTag(Node node) {
        NodeList childNodes = node.getChildNodes();
        StringBuffer stringBuffer = new StringBuffer();
        for (int i = 0; i < childNodes.getLength(); i++) {
            Node item = childNodes.item(i);
            if (item.getNodeName().equalsIgnoreCase("a")) {
                NamedNodeMap attributes = item.getAttributes();
                if (attributes.getNamedItem("href") != null) {
                    this.m_containedLinks.add(attributes.getNamedItem("href").getNodeValue());
                }
                if (attributes.getNamedItem("HREF") != null) {
                    this.m_containedLinks.add(attributes.getNamedItem("HREF").getNodeValue());
                }
            }
            short nodeType = item.getNodeType();
            if (nodeType == 1) {
                stringBuffer.append(getTextOfHtmlTag(item));
                stringBuffer.append(" ");
            } else if (nodeType == 3) {
                stringBuffer.append(((Text) item).getData());
            }
        }
        return stringBuffer.toString();
    }

    private Document parseStream(InputStream inputStream) throws IOException, ParserConfigurationException, SAXException {
        return createParser().parse(inputStream);
    }

    private Map<String, SimpleMultiValueMap<String, URI>> getAttributeMappingTable() {
        return m_attributeMappingTable;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v1, types: [javax.xml.parsers.DocumentBuilderFactory] */
    /* JADX WARN: Type inference failed for: r0v2, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r0v5, types: [javax.xml.parsers.DocumentBuilder] */
    private DocumentBuilder createParser() throws ParserConfigurationException {
        ?? r0 = this.m_documentBuilderFactory;
        synchronized (r0) {
            r0 = DocumentBuilderFactory.newInstance().newDocumentBuilder();
        }
        return r0;
    }

    @Override // de.dfki.catwiesel.synchronizer.importer.Importer
    public URI startImport(ImportConfiguration importConfiguration) throws ImporterException, ConfigurationException {
        getLogger().finer("starting import");
        String importType = importConfiguration.getImportType();
        if (!importType.equals(StringConstants.IMPORT_TYPE_RSS_FEED)) {
            return null;
        }
        try {
            File file = new File(FileHandling.getNormalizedPath(importConfiguration.getUniqueAsString(ImportConfiguration.SOURCE_KEY)));
            String uniqueAsString = importConfiguration.getUniqueAsString(ImportConfiguration.PARENT_URI_KEY);
            EnhancedRawData enhancedRawData = this.m_importerHelper.getEnhancedRawData((Object) new FileInputStream(file), file.getCanonicalPath(), Catwiesel.VIRTUAL_ROOT_STRING.equals(uniqueAsString) ? Catwiesel.VIRTUAL_ROOT_URI : new URIImpl(uniqueAsString), importType, false);
            enhancedRawData.addFixedAttributesIfAny(importConfiguration);
            process(enhancedRawData);
            return enhancedRawData.getURI();
        } catch (Exception e) {
            getLogger().log(Level.WARNING, "Cannot read file", (Throwable) e);
            throw new ImporterException(e);
        } catch (ConfigurationException e2) {
            throw e2;
        }
    }

    @Override // de.dfki.catwiesel.synchronizer.importer.Importer
    public Map<String, ImportConfiguration> getImportCapabilities() {
        return this.m_importCapabilities;
    }

    @Override // de.dfki.catwiesel.synchronizer.importer.Importer
    public ImportConfiguration getImportCapabilities(String str) {
        return this.m_importCapabilities.get(str);
    }

    public static Logger getLogger() {
        return m_logger;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v1, types: [java.util.HashMap<java.lang.Thread, java.lang.Boolean>] */
    /* JADX WARN: Type inference failed for: r0v2, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r0v6 */
    @Override // de.dfki.catwiesel.synchronizer.importer.Importer
    public void stopImport(Thread thread) {
        ?? r0 = this.m_stopRequests;
        synchronized (r0) {
            this.m_stopRequests.put(thread, Boolean.TRUE);
            r0 = r0;
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v1, types: [java.util.HashMap<java.lang.Thread, java.lang.Boolean>] */
    /* JADX WARN: Type inference failed for: r0v2, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r0v6 */
    @Override // de.dfki.catwiesel.synchronizer.importer.Importer
    public void reset(Thread thread) {
        ?? r0 = this.m_stopRequests;
        synchronized (r0) {
            this.m_stopRequests.remove(thread);
            r0 = r0;
        }
    }
}
