package de.dfki.km.exact.koios.example.dblp;

import de.dfki.km.exact.file.EUFile;
import de.dfki.km.exact.koios.impl.voc.PREPROCESS;
import de.dfki.km.exact.misc.EULogger;
import de.dfki.km.exact.misc.EUString;
import de.dfki.km.exact.sesame.EUTripleStore;
import de.dfki.km.exact.sesame.EUTripleStoreFactory;
import de.dfki.km.exact.sesame.EUTripleStoreWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import javax.xml.parsers.SAXParserFactory;
import org.openrdf.model.Literal;
import org.openrdf.model.URI;
import org.openrdf.model.impl.LiteralImpl;
import org.openrdf.model.impl.URIImpl;
import org.openrdf.model.vocabulary.RDF;
import org.openrdf.model.vocabulary.RDFS;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:de/dfki/km/exact/koios/example/dblp/DBLPParser.class */
public class DBLPParser extends DefaultHandler implements DBLP {
    private EUTripleStore mStore;
    private String mCurrentAttribute;
    private Set<String> attributes;
    private int mMinYear;
    private int mMaxTriples;
    private HashMap<String, URI> mPersonMap;
    private HashMap<String, URI> mKeyMap;
    private boolean mNat;
    private String mToStore;
    private String keyAttribute = "key";
    private DBLPDocument mDocument = null;
    private int mDocCounter = 0;
    private int mTripleCounter = 0;
    private Set<String> types = new HashSet();

    public DBLPParser(int i, int i2) {
        this.mMinYear = i;
        this.mMaxTriples = i2;
        this.types.add(DBLP.ARTICLE);
        this.types.add(DBLP.INPROCEEDINGS);
        this.types.add(DBLP.PROCEEDINGS);
        this.types.add(DBLP.BOOK);
        this.attributes = new HashSet();
        this.attributes.add(DBLP.TITLE);
        this.attributes.add(DBLP.AUTHOR);
        this.attributes.add(DBLP.YEAR);
        this.attributes.add(DBLP.MONTH);
        this.attributes.add(DBLP.JOURNAL);
        this.attributes.add(DBLP.NUMBER);
        this.attributes.add(DBLP.VOLUME);
        this.attributes.add(DBLP.PUBLISHER);
        this.mPersonMap = new HashMap<>();
        this.mKeyMap = new HashMap<>();
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) {
        if (this.types.contains(str3)) {
            this.mDocument = new DBLPDocument(attributes.getValue(this.keyAttribute));
            this.mDocument.setType(str3);
        } else if (this.attributes.contains(str3)) {
            this.mCurrentAttribute = str3;
        }
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i, int i2) {
        if (this.mDocument == null || this.mCurrentAttribute == null) {
            return;
        }
        String str = new String(cArr, i, i2);
        if (str.indexOf(46) == str.length() - 1) {
            str = str.substring(0, str.length() - 1);
        }
        if (this.mCurrentAttribute.equals(DBLP.YEAR)) {
            this.mDocument.setYear(str);
            return;
        }
        if (this.mCurrentAttribute.equals(DBLP.MONTH)) {
            this.mDocument.setMonth(str);
            return;
        }
        if (this.mCurrentAttribute.equals(DBLP.AUTHOR)) {
            this.mDocument.addAuthor(str);
            return;
        }
        if (this.mCurrentAttribute.equals(DBLP.TITLE)) {
            this.mDocument.setTitle(str);
            return;
        }
        if (this.mCurrentAttribute.equals(DBLP.JOURNAL)) {
            this.mDocument.setJournal(str);
            return;
        }
        if (this.mCurrentAttribute.equals(DBLP.VOLUME)) {
            this.mDocument.setVolume(str);
            return;
        }
        if (this.mCurrentAttribute.equals(DBLP.BOOKTITLE)) {
            this.mDocument.setBooktitle(str);
            return;
        }
        if (this.mCurrentAttribute.equals(DBLP.PUBLISHER)) {
            this.mDocument.setPublisher(str);
            return;
        }
        if (this.mCurrentAttribute.equals(DBLP.MYURL)) {
            this.mDocument.setUrl(str);
            return;
        }
        if (this.mCurrentAttribute.equals(DBLP.ISBN)) {
            this.mDocument.setIsbn(str);
        } else if (this.mCurrentAttribute.equals(DBLP.CROSSREF)) {
            this.mDocument.setCrossref(str);
        } else if (this.mCurrentAttribute.equals(DBLP.NUMBER)) {
            this.mDocument.setNumber(str);
        }
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) throws SAXException {
        if (this.mDocument != null && this.types.contains(str3)) {
            handleDocument();
            this.mDocument = null;
        } else if (this.mCurrentAttribute != null && this.attributes.contains(str3)) {
            this.mCurrentAttribute = null;
        }
        if (this.mTripleCounter >= this.mMaxTriples) {
            throw new SAXException("\nLimit reached after " + this.mTripleCounter + " triples.");
        }
    }

    private URI generateURI(String str) {
        String[] split = EUString.split(str, " …•‚”“„‘«»<>’,|`.;:?!-_'/()[]{}@§$%&=^°*+~#´\"\\");
        StringBuilder sb = new StringBuilder();
        for (String str2 : split) {
            sb.append(EUString.startWithUpperLetter(str2));
        }
        return new URIImpl(DBLP.NAMESPACE + sb.toString());
    }

    private EUTripleStore getOntology() {
        EUTripleStore memoryStore = EUTripleStoreFactory.getMemoryStore();
        memoryStore.addStatement(DocumentURI, RDF.TYPE, RDFS.CLASS);
        memoryStore.addStatement(ArticleURI, RDFS.SUBCLASSOF, DocumentURI);
        memoryStore.addStatement(InproceedingsURI, RDFS.SUBCLASSOF, DocumentURI);
        memoryStore.addStatement(BookURI, RDFS.SUBCLASSOF, DocumentURI);
        memoryStore.addStatement(ProceedingsURI, RDFS.SUBCLASSOF, DocumentURI);
        memoryStore.addStatement(JournalURI, RDFS.SUBCLASSOF, DocumentURI);
        memoryStore.addStatement(PersonURI, RDF.TYPE, RDFS.CLASS);
        memoryStore.addStatement(JournalURI, RDF.TYPE, RDFS.CLASS);
        memoryStore.addStatement(ArticleURI, RDF.TYPE, RDFS.CLASS);
        memoryStore.addStatement(ProceedingsURI, RDF.TYPE, RDFS.CLASS);
        memoryStore.addStatement(InproceedingsURI, RDF.TYPE, RDFS.CLASS);
        memoryStore.addStatement(BookURI, RDF.TYPE, RDFS.CLASS);
        memoryStore.addStatement(WWWURI, RDF.TYPE, RDFS.CLASS);
        memoryStore.addStatement(yearURI, RDF.TYPE, RDF.PROPERTY);
        memoryStore.addStatement(volumeURI, RDF.TYPE, RDF.PROPERTY);
        memoryStore.addStatement(numberURI, RDF.TYPE, RDF.PROPERTY);
        memoryStore.addStatement(titleURI, RDF.TYPE, RDF.PROPERTY);
        memoryStore.addStatement(hasCrossrefURI, RDF.TYPE, RDF.PROPERTY);
        memoryStore.addStatement(hasAuthorURI, RDF.TYPE, RDF.PROPERTY);
        memoryStore.addStatement(hasPublisherURI, RDF.TYPE, RDF.PROPERTY);
        memoryStore.addStatement(hasEditorURI, RDF.TYPE, RDF.PROPERTY);
        memoryStore.addStatement(PersonURI, RDFS.LABEL, "Person");
        memoryStore.addStatement(JournalURI, RDFS.LABEL, "Journal");
        memoryStore.addStatement(ArticleURI, RDFS.LABEL, "Article");
        memoryStore.addStatement(ProceedingsURI, RDFS.LABEL, "Proceedings");
        memoryStore.addStatement(InproceedingsURI, RDFS.LABEL, "Inproceedings");
        memoryStore.addStatement(BookURI, RDFS.LABEL, "Book");
        memoryStore.addStatement(WWWURI, RDFS.LABEL, "WWW");
        memoryStore.addStatement(DocumentURI, RDFS.LABEL, "Document");
        memoryStore.addStatement(DocumentURI, RDFS.LABEL, "Publication");
        memoryStore.addStatement(yearURI, RDFS.LABEL, DBLP.YEAR);
        memoryStore.addStatement(volumeURI, RDFS.LABEL, DBLP.VOLUME);
        memoryStore.addStatement(numberURI, RDFS.LABEL, DBLP.NUMBER);
        memoryStore.addStatement(titleURI, RDFS.LABEL, DBLP.TITLE);
        memoryStore.addStatement(hasCrossrefURI, RDFS.LABEL, "has cross reference");
        memoryStore.addStatement(hasAuthorURI, RDFS.LABEL, "has author");
        memoryStore.addStatement(hasPublisherURI, RDFS.LABEL, "has publisher");
        memoryStore.addStatement(hasEditorURI, RDFS.LABEL, "has editor");
        return memoryStore;
    }

    private final void handleDocument() {
        String year;
        if (this.mDocument.getTitle().toLowerCase().contains("publications") || (year = this.mDocument.getYear()) == null || Integer.valueOf(year).intValue() <= this.mMinYear || this.mDocument.getKey() == null) {
            return;
        }
        URI handleKeys = handleKeys(this.mDocument.getKey());
        add(handleKeys, (URI) yearURI, (Literal) new LiteralImpl(year, new URIImpl("http://www.w3.org/2001/XMLSchema#int")));
        addAuthors(handleKeys, this.mDocument.getAuthors());
        addEditors(handleKeys, this.mDocument.getEditors());
        addType(handleKeys, this.mDocument.getType());
        if (this.mDocument.getType().equals(DBLP.ARTICLE)) {
            add(handleKeys, (URI) hasCrossrefURI, handleJournal(this.mDocument.getJournal(), this.mDocument.getVolume(), this.mDocument.getNumber(), this.mDocument.getYear()));
        } else {
            add(handleKeys, (URI) hasCrossrefURI, handleKeys(this.mDocument.getCrossref()));
        }
        add(handleKeys, RDFS.LABEL, this.mDocument.getTitle());
        add(handleKeys, (URI) hasPublisherURI, handlePerson(this.mDocument.getPublisher()));
        this.mDocCounter++;
        if (this.mDocCounter % 10000 == 0) {
            EULogger.info("Current document number: " + this.mDocCounter);
        }
    }

    private final URI handleJournal(String str, String str2, String str3, String str4) {
        StringBuffer stringBuffer = new StringBuffer();
        if (str != null) {
            stringBuffer.append(str);
        }
        if (str2 != null) {
            stringBuffer.append(PREPROCESS.XMLFileSeperator);
            stringBuffer.append(str2);
        }
        if (str3 != null) {
            stringBuffer.append(PREPROCESS.XMLFileSeperator);
            stringBuffer.append(str3);
        }
        if (str4 != null) {
            stringBuffer.append(PREPROCESS.XMLFileSeperator);
            stringBuffer.append(str4);
        }
        String stringBuffer2 = stringBuffer.toString();
        URI uri = this.mKeyMap.get(stringBuffer2);
        if (uri == null) {
            uri = generateURI(stringBuffer2);
            this.mKeyMap.put(stringBuffer2, uri);
            this.mStore.addStatement(uri, RDF.TYPE, JournalURI);
            this.mTripleCounter++;
            add(uri, (URI) volumeURI, str2);
            add(uri, (URI) numberURI, str3);
            add(uri, (URI) yearURI, str4);
        }
        return uri;
    }

    private final void addType(URI uri, String str) {
        if (str.equals(DBLP.ARTICLE)) {
            this.mStore.addStatement(uri, RDF.TYPE, ArticleURI);
            this.mTripleCounter++;
            return;
        }
        if (str.equals(DBLP.INPROCEEDINGS)) {
            this.mStore.addStatement(uri, RDF.TYPE, InproceedingsURI);
            this.mTripleCounter++;
        } else if (str.equals(DBLP.BOOK)) {
            this.mStore.addStatement(uri, RDF.TYPE, BookURI);
            this.mTripleCounter++;
        } else if (str.equals(DBLP.WWW)) {
            this.mStore.addStatement(uri, RDF.TYPE, WWWURI);
            this.mTripleCounter++;
        }
    }

    private final void addAuthors(URI uri, Set<String> set) {
        Iterator<String> it = set.iterator();
        while (it.hasNext()) {
            this.mStore.addStatement(uri, hasAuthorURI, handlePerson(it.next()));
            this.mTripleCounter++;
        }
    }

    private final void addEditors(URI uri, Set<String> set) {
        Iterator<String> it = set.iterator();
        while (it.hasNext()) {
            this.mStore.addStatement(uri, hasEditorURI, handlePerson(it.next()));
            this.mTripleCounter++;
        }
    }

    private final void add(URI uri, URI uri2, String str) {
        if (str != null) {
            this.mStore.addStatement(uri, uri2, str);
            this.mTripleCounter++;
        }
    }

    private final void add(URI uri, URI uri2, Literal literal) {
        if (literal != null) {
            this.mStore.addStatement(uri, uri2, literal);
            this.mTripleCounter++;
        }
    }

    private final void add(URI uri, URI uri2, URI uri3) {
        if (uri3 != null) {
            this.mStore.addStatement(uri, uri2, uri3);
            this.mTripleCounter++;
        }
    }

    private final URI handlePerson(String str) {
        if (str == null) {
            return null;
        }
        URI uri = this.mPersonMap.get(str);
        if (uri == null) {
            uri = generateURI(str);
            this.mPersonMap.put(str, uri);
            this.mStore.addStatement(uri, nameURI, str);
            this.mStore.addStatement(uri, RDF.TYPE, PersonURI);
            this.mTripleCounter += 2;
        }
        return uri;
    }

    private final URI handleKeys(String str) {
        if (str == null) {
            return null;
        }
        URI uri = this.mKeyMap.get(str);
        if (uri == null) {
            uri = generateURI(str);
            this.mKeyMap.put(str, uri);
            this.mTripleCounter++;
        }
        return uri;
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void endDocument() {
        EULogger.info("documents: " + this.mDocCounter);
        EULogger.info("triples: " + this.mTripleCounter);
    }

    public final void parse(String str, String str2) {
        parse(str, str2, false);
    }

    public final void parse(String str, String str2, boolean z) {
        this.mNat = z;
        this.mToStore = str2;
        if (z) {
            this.mStore = EUTripleStoreFactory.getNativeStore(str2);
        } else {
            this.mStore = EUTripleStoreFactory.getMemoryStore();
        }
        SAXParserFactory newInstance = SAXParserFactory.newInstance();
        newInstance.setValidating(false);
        try {
            newInstance.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
            InputSource inputSource = new InputSource(new InputStreamReader(new FileInputStream(new File(str)), "UTF-8"));
            inputSource.setEncoding("UTF-8");
            newInstance.newSAXParser().parse(inputSource, this);
        } catch (Exception e) {
            EULogger.warn(getClass(), e);
        }
    }

    public void close() {
        this.mStore.addStore(getOntology());
        if (this.mNat) {
            this.mStore.close();
            return;
        }
        try {
            new Integer(1).intValue();
            EUTripleStoreWriter.writeRDFXML(this.mToStore, this.mStore);
        } catch (Exception e) {
            EULogger.warn(getClass(), e);
        }
    }

    public static void main(String[] strArr) {
        EUFile.delete(DBLP.NATIVE_STORE);
        EUFile.delete(DBLP.FILE_STORE);
        DBLPParser dBLPParser = new DBLPParser(2008, 10000);
        dBLPParser.parse(DBLP.XML, DBLP.NATIVE_STORE, true);
        dBLPParser.close();
    }
}
