package de.dfki.km.perspecting.obie.connection.ontology;

import de.dfki.km.perspecting.obie.connection.ontology.RDFTripleParser;
import de.dfki.km.perspecting.obie.utils.SortingFile;
import de.dfki.km.perspecting.obie.utils.logging.ScoobieLogging;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLDecoder;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.io.FileUtils;

/* loaded from: input_file:de/dfki/km/perspecting/obie/connection/ontology/DBSessionDataPreprocessor.class */
public class DBSessionDataPreprocessor {
    public static final String DBSCHEMA = "/model/db/dbscheme.sql";
    private static final int MAX_URI_LENGTH = 256;
    private static final String UTF_8 = "UTF-8";
    private String sessionPath;
    private String session;
    private final ArrayList<PreparedStatement> statements = new ArrayList<>();
    private Connection connection;
    private HashMap<String, String> prefixes;
    private HashMap<String, String> invPrefixes;
    private final DBManager dbManager;
    private static final ExecutorService pool = Executors.newFixedThreadPool(5);
    private static final Logger log = Logger.getLogger(DBSessionDataPreprocessor.class.getName());

    public DBSessionDataPreprocessor(DBManager dBManager) {
        this.dbManager = dBManager;
    }

    private void init() throws Exception, IOException {
        this.sessionPath = System.getProperty("scoobie.sessionpath");
        if (!this.sessionPath.endsWith("/")) {
            this.sessionPath = String.valueOf(this.sessionPath) + "/";
        }
        this.sessionPath = String.valueOf(this.sessionPath) + this.session;
        FileUtils.deleteDirectory(new File(this.sessionPath));
        this.connection = this.dbManager.createDatabase(this.session);
        this.connection.setAutoCommit(false);
        Statement createStatement = this.connection.createStatement();
        InputStream resourceAsStream = DBSessionDataPreprocessor.class.getResourceAsStream(DBSCHEMA);
        if (resourceAsStream == null) {
            throw new IOException("File not found: /model/db/dbscheme.sql");
        }
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(resourceAsStream));
        StringBuffer stringBuffer = new StringBuffer();
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            } else {
                stringBuffer.append(String.valueOf(readLine) + "\n");
            }
        }
        for (String str : stringBuffer.toString().split(";\n")) {
            createStatement.addBatch(str);
        }
        createStatement.executeBatch();
        createStatement.close();
        this.connection.commit();
        initFolderStructure();
    }

    private void initFolderStructure() throws IOException {
        if (new File(this.sessionPath).mkdirs()) {
            log.info("Folder " + this.sessionPath + " already exists.");
        } else {
            log.info("Folder " + this.sessionPath + " already exists.");
        }
        new File(String.valueOf(this.sessionPath) + "/hierarchies").mkdir();
        new File(String.valueOf(this.sessionPath) + "/indexes").mkdir();
        new File(String.valueOf(this.sessionPath) + "/statistics").mkdir();
        new File(String.valueOf(this.sessionPath) + "/relations").mkdir();
        new File(String.valueOf(this.sessionPath) + "/symbols").mkdir();
        new File(String.valueOf(this.sessionPath) + "/models").mkdir();
        new File(String.valueOf(this.sessionPath) + "/models/MalletCRF").mkdir();
        new File(String.valueOf(this.sessionPath) + "/DocumentCorpus").mkdir();
        new File(String.valueOf(this.sessionPath) + "/models/PhraseClassification").mkdir();
        log.info("Created initial folder structure under: " + this.sessionPath);
        URL resource = DBSessionDataPreprocessor.class.getResource("/model/all/NamedEntitiesGazetteers");
        URL resource2 = DBSessionDataPreprocessor.class.getResource("/model/MalletCRF");
        URL resource3 = DBSessionDataPreprocessor.class.getResource("/model/all/StructuredEntitiesPatterns");
        FileUtils.copyDirectoryToDirectory(new File(resource.getFile()), new File(String.valueOf(this.sessionPath) + "/models"));
        FileUtils.copyDirectoryToDirectory(new File(resource2.getFile()), new File(String.valueOf(this.sessionPath) + "/models/"));
        FileUtils.copyDirectoryToDirectory(new File(resource3.getFile()), new File(String.valueOf(this.sessionPath) + "/models"));
        log.info("Copied default pattern files and gazetteers to: " + this.sessionPath + "/models");
    }

    public void preprocess(String[] strArr, String str, String str2, String str3, String str4) throws Exception {
        this.session = str3.replaceAll("\\W", "_");
        init();
        log.info("Analysing hierarchies in ontology.");
        indexRDFData(strArr, str, str2, str4);
    }

    private void indexRDFData(String[] strArr, String str, String str2, String str3) throws Exception {
        System.out.print("Parsing RDF dump files: ... ");
        long currentTimeMillis = System.currentTimeMillis();
        RDFTripleParser.TripleStats parseTriples = RDFTripleParser.parseTriples(strArr, str, this.sessionPath, str3);
        System.out.println("[done] took " + (System.currentTimeMillis() - currentTimeMillis) + "ms");
        System.out.print("Storing resource index: ... ");
        long currentTimeMillis2 = System.currentTimeMillis();
        storeResourceIndex(this.connection, parseTriples.resourceList, parseTriples.invPrefixes);
        this.prefixes = parseTriples.prefixes;
        this.invPrefixes = parseTriples.invPrefixes;
        System.out.println("[done] took " + (System.currentTimeMillis() - currentTimeMillis2) + "ms");
        System.out.print("Storing literal index: ... ");
        long currentTimeMillis3 = System.currentTimeMillis();
        storeLiteralIndex(this.connection, parseTriples.literalList, parseTriples.literalLanguageList, str2);
        System.out.println("Starting commit.");
        this.connection.commit();
        System.out.println("[done] took " + (System.currentTimeMillis() - currentTimeMillis3) + "ms");
        System.out.println("Extracting datatype property values: ");
        ArrayList arrayList = new ArrayList();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(parseTriples.datatypeProps));
        String readLine = bufferedReader.readLine();
        while (true) {
            final String str4 = readLine;
            if (str4 == null) {
                break;
            }
            arrayList.add(new Callable<Object>() { // from class: de.dfki.km.perspecting.obie.connection.ontology.DBSessionDataPreprocessor.1
                @Override // java.util.concurrent.Callable
                public Object call() throws Exception {
                    System.out.println("Starting : " + str4);
                    long currentTimeMillis4 = System.currentTimeMillis();
                    DBSessionDataPreprocessor.this.initDatatypePropertyValues(str4);
                    System.out.println("Finished: " + str4 + " [done] took " + (System.currentTimeMillis() - currentTimeMillis4) + "ms");
                    return null;
                }
            });
            readLine = bufferedReader.readLine();
        }
        bufferedReader.close();
        Iterator it = pool.invokeAll(arrayList).iterator();
        while (it.hasNext()) {
            ((Future) it.next()).get();
        }
        System.out.println("Starting commit.");
        this.connection.commit();
        arrayList.clear();
        System.out.println("Extracting object property values.");
        BufferedReader bufferedReader2 = new BufferedReader(new FileReader(parseTriples.objectProps));
        String readLine2 = bufferedReader2.readLine();
        while (true) {
            final String str5 = readLine2;
            if (str5 == null) {
                break;
            }
            arrayList.add(new Callable<Object>() { // from class: de.dfki.km.perspecting.obie.connection.ontology.DBSessionDataPreprocessor.2
                @Override // java.util.concurrent.Callable
                public Object call() throws Exception {
                    System.out.println("Starting : " + str5);
                    long currentTimeMillis4 = System.currentTimeMillis();
                    DBSessionDataPreprocessor.this.initObjectPropertyValues(str5);
                    System.out.println("Finished: " + str5 + " [done] took " + (System.currentTimeMillis() - currentTimeMillis4) + "ms");
                    return null;
                }
            });
            readLine2 = bufferedReader2.readLine();
        }
        Iterator it2 = pool.invokeAll(arrayList).iterator();
        while (it2.hasNext()) {
            ((Future) it2.next()).get();
        }
        pool.shutdown();
        System.out.println("Starting commit.");
        this.connection.commit();
        close();
        log.info("[finished]");
    }

    private void storeResourceIndex(Connection connection, File file, HashMap<String, String> hashMap) throws Exception {
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        PreparedStatement prepareStatement = connection.prepareStatement("INSERT INTO index_resources (index_resources.index, index_resources.uri) VALUES (?, ?)");
        String readLine = bufferedReader.readLine();
        int i = 0;
        while (readLine != null) {
            int indexOf = readLine.indexOf(58);
            String str = String.valueOf(hashMap.get(readLine.substring(0, indexOf + 1))) + readLine.substring(indexOf + 1);
            if (str.length() >= MAX_URI_LENGTH) {
                ScoobieLogging.log(this.session, "TRAINING", "Shrinked long URI: " + str, log, Level.WARNING);
                str = str.substring(0, MAX_URI_LENGTH);
            }
            int i2 = i;
            i++;
            prepareStatement.setInt(1, i2);
            prepareStatement.setString(2, str);
            prepareStatement.executeUpdate();
            readLine = bufferedReader.readLine();
            if (i % 100000 == 0) {
                System.out.println(" ... added " + i + " resources.");
            }
        }
        System.out.println(" ... added " + i + " resources.");
        System.out.println("Starting commit.");
        connection.commit();
        bufferedReader.close();
        prepareStatement.close();
        System.out.println(" ... stored " + i + " resources.");
    }

    private double compare(BufferedReader bufferedReader, BufferedReader bufferedReader2, BufferedWriter bufferedWriter) throws Exception {
        String readLine = bufferedReader2.readLine();
        String readLine2 = bufferedReader.readLine();
        double d = 0.0d;
        double d2 = 0.0d;
        double d3 = 0.0d;
        while (readLine2 != null && readLine != null) {
            int compare = String.CASE_INSENSITIVE_ORDER.compare(readLine2, readLine);
            if (compare < 0) {
                readLine2 = bufferedReader.readLine();
                d2 += 1.0d;
            } else if (compare > 0) {
                bufferedWriter.write(readLine);
                bufferedWriter.newLine();
                readLine = bufferedReader2.readLine();
                d3 += 1.0d;
            } else {
                readLine = bufferedReader2.readLine();
                readLine2 = bufferedReader.readLine();
                d2 += 1.0d;
                d3 += 1.0d;
                d += 1.0d;
            }
        }
        while (readLine != null) {
            d3 += 1.0d;
            bufferedWriter.write(readLine);
            bufferedWriter.newLine();
            readLine = bufferedReader2.readLine();
        }
        double d4 = d / d3;
        return d4 + ((1.0d - d4) * (d / d2));
    }

    private void storeLiteralIndex(Connection connection, File file, File file2, String str) throws Exception {
        String readLine;
        String readLine2;
        for (File file3 : FileUtils.listFiles(new File(str), new String[]{"gaz"}, false)) {
            log.info("Found gazetteer: " + file3);
            File externalSort = SortingFile.externalSort(file3.getAbsolutePath(), this.sessionPath, 1000L);
            BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
            BufferedReader bufferedReader2 = new BufferedReader(new FileReader(externalSort));
            BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File(file3.getAbsoluteFile() + ".new")));
            double compare = compare(bufferedReader, bufferedReader2, bufferedWriter);
            log.info("Gazetteer: " + externalSort + " " + compare);
            bufferedWriter.close();
            bufferedReader.close();
            bufferedReader2.close();
            if (compare == 0.0d) {
                new File(file3.getAbsoluteFile() + ".new").delete();
            }
        }
        Collection<File> listFiles = FileUtils.listFiles(new File(str), new String[]{"new"}, false);
        BufferedWriter bufferedWriter2 = new BufferedWriter(new FileWriter(new File(file.getAbsoluteFile() + ".new")));
        BufferedReader bufferedReader3 = new BufferedReader(new FileReader(file));
        long j = 0;
        do {
            readLine = bufferedReader3.readLine();
            if (readLine != null) {
                j++;
                bufferedWriter2.write(readLine);
                bufferedWriter2.newLine();
            }
        } while (readLine != null);
        bufferedReader3.close();
        for (File file4 : listFiles) {
            log.info("Found relevant gazetteer: " + file4);
            BufferedReader bufferedReader4 = new BufferedReader(new FileReader(file4));
            do {
                readLine2 = bufferedReader4.readLine();
                if (readLine2 != null) {
                    j++;
                    bufferedWriter2.write(readLine2);
                    bufferedWriter2.newLine();
                }
            } while (readLine2 != null);
        }
        bufferedWriter2.close();
        log.info("Write literals to : " + new File(file.getAbsoluteFile() + ".new"));
        File externalSort2 = SortingFile.externalSort(file.getAbsoluteFile() + ".new", this.sessionPath, j / 500);
        log.info("Sorted literals  : " + externalSort2);
        PreparedStatement prepareStatement = connection.prepareStatement("INSERT INTO index_literals (index, literal, prefix) VALUES (?, ?, ?)");
        PreparedStatement prepareStatement2 = connection.prepareStatement("INSERT INTO map_language (index, language)  SELECT index AS index, ? AS language FROM index_literals WHERE (literal = ?)");
        BufferedReader bufferedReader5 = new BufferedReader(new FileReader(externalSort2));
        String readLine3 = bufferedReader5.readLine();
        int i = 0;
        connection.setAutoCommit(false);
        String str2 = "";
        String str3 = "";
        while (readLine3 != null) {
            if (readLine3.trim().equals(str2)) {
                readLine3 = bufferedReader5.readLine();
            } else {
                str2 = readLine3.trim();
                try {
                    int i2 = i;
                    i++;
                    prepareStatement.setInt(1, i2);
                    prepareStatement.setString(2, str2);
                    prepareStatement.setInt(3, getPrefix(str2, 4).hashCode());
                    prepareStatement.executeUpdate();
                    readLine3 = bufferedReader5.readLine();
                    if (i % 100000 == 0) {
                        System.out.println(" Added " + i + " literals.");
                    }
                } catch (Exception e) {
                    throw new Exception("Invalid value: " + str2, e);
                }
            }
        }
        prepareStatement.close();
        bufferedReader5.close();
        System.out.println(" Committing " + i + " literals.");
        connection.commit();
        System.out.println(" ... stored " + i + " literals.");
        BufferedReader bufferedReader6 = new BufferedReader(new FileReader(file2));
        connection.setAutoCommit(false);
        int i3 = 0;
        for (String readLine4 = bufferedReader6.readLine(); readLine4 != null; readLine4 = bufferedReader6.readLine()) {
            try {
                str3 = readLine4.substring(readLine4.lastIndexOf(64) + 1);
                String substring = readLine4.substring(0, readLine4.length() - (str3.length() + 1));
                prepareStatement2.setString(1, str3);
                prepareStatement2.setString(2, substring);
                prepareStatement2.executeUpdate();
                i3++;
                if (i3 % 100000 == 0) {
                    System.out.println(" Added " + i3 + " language tags for literals.");
                }
            } catch (Exception e2) {
                throw new Exception("Invalid language: " + str3, e2);
            }
        }
        prepareStatement2.close();
        connection.commit();
        System.out.println(" Committet " + i3 + " language tags for literals.");
        bufferedReader6.close();
    }

    private final String getPrefix(String str, int i) {
        return str.length() < i ? str.toLowerCase() : str.substring(0, i).toLowerCase();
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void initObjectPropertyValues(String str) throws Exception {
        PreparedStatement prepareStatement = this.connection.prepareStatement("INSERT INTO relations  (subject, predicate, object) SELECT A.index AS subject, B.index AS predicate, C.index AS object FROM index_resources A, index_resources B, index_resources C WHERE(A.uri = ? AND B.uri = ? AND C.uri = ?) ");
        File file = new File(String.valueOf(this.sessionPath) + "/relations/" + str.replaceAll("[\\W]", ""));
        if (!file.exists()) {
            return;
        }
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        this.connection.setAutoCommit(false);
        String readLine = bufferedReader.readLine();
        while (true) {
            String str2 = readLine;
            if (str2 == null) {
                bufferedReader.close();
                prepareStatement.close();
                this.connection.commit();
                return;
            }
            String[] split = str2.split("=");
            split[0] = URLDecoder.decode(split[0], UTF_8);
            split[1] = URLDecoder.decode(split[1], UTF_8);
            String str3 = split[1];
            prepareStatement.setString(1, split[0]);
            prepareStatement.setString(2, str);
            prepareStatement.setString(3, str3);
            prepareStatement.executeUpdate();
            readLine = bufferedReader.readLine();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void initDatatypePropertyValues(String str) throws Exception {
        PreparedStatement prepareStatement = this.connection.prepareStatement("INSERT INTO symbols (subject, predicate, object, belief) SELECT A.index AS subject, B.index AS predicate, C.index AS object, 1.0 AS belief FROM index_resources A, index_resources B, index_literals C WHERE (A.uri = ? AND B.uri = ? AND C.literal = ?) ");
        File file = new File(String.valueOf(this.sessionPath) + "/symbols/" + str.replaceAll("[\\W]", ""));
        if (!file.exists()) {
            log.warning(String.valueOf(file.getName()) + " does not exist.");
            return;
        }
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        this.connection.setAutoCommit(false);
        String readLine = bufferedReader.readLine();
        while (true) {
            String str2 = readLine;
            if (str2 == null) {
                this.connection.commit();
                bufferedReader.close();
                prepareStatement.close();
                return;
            }
            String[] split = str2.split("=");
            split[0] = URLDecoder.decode(split[0], UTF_8);
            String decode = URLDecoder.decode(split[1], UTF_8);
            prepareStatement.setString(1, split[0]);
            prepareStatement.setString(2, str);
            prepareStatement.setString(3, decode);
            prepareStatement.executeUpdate();
            readLine = bufferedReader.readLine();
        }
    }

    private void close() throws Exception {
        Iterator<PreparedStatement> it = this.statements.iterator();
        while (it.hasNext()) {
            it.next().close();
        }
        this.dbManager.close(this.session);
    }
}
