package de.dfki.km.leech.elasticsearch;

import com.jayway.jsonpath.JsonPath;
import com.jayway.jsonpath.Predicate;
import de.dfki.inquisitor.collections.CollectionUtilz;
import de.dfki.km.leech.metadata.LeechMetadata;
import de.dfki.km.leech.sax.DataSinkContentHandler;
import de.dfki.km.leech.util.LeechException;
import java.util.HashMap;
import net.minidev.json.JSONValue;
import org.apache.http.client.fluent.Content;
import org.apache.http.client.fluent.Request;
import org.apache.http.entity.ContentType;
import org.apache.tika.metadata.Metadata;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/dfki/km/leech/elasticsearch/ToElasticSearchContentHandler.class */
public class ToElasticSearchContentHandler extends DataSinkContentHandler {
    protected HashMap<String, String> m_hsRenameAtts = new HashMap<>();
    protected int m_iCurrentBulkSize = 0;
    protected int m_iMaxBulkSize = 100;
    protected int m_iPort = 9200;
    protected String m_strEsSearchIndex = "";
    protected String m_strEsUrl = "http://localhost";
    protected StringBuilder m_strbCurrentNdJsonBulk = new StringBuilder();

    public ToElasticSearchContentHandler attributeRenames(HashMap<String, String> hashMap) {
        this.m_hsRenameAtts = hashMap;
        return this;
    }

    public ToElasticSearchContentHandler bulkSize(int i) {
        this.m_iMaxBulkSize = i;
        return this;
    }

    @Override // de.dfki.km.leech.sax.DataSinkContentHandler
    public void crawlFinished() {
        try {
            if (this.m_strbCurrentNdJsonBulk.length() == 0) {
                return;
            }
            Content returnContent = Request.Put(String.format("http://%s:%s/%s/_bulk", this.m_strEsUrl, Integer.valueOf(this.m_iPort), this.m_strEsSearchIndex)).bodyString(this.m_strbCurrentNdJsonBulk.toString(), ContentType.APPLICATION_JSON).execute().returnContent();
            if (((Boolean) JsonPath.read(returnContent.toString(), "$.errors", new Predicate[0])).booleanValue()) {
                throw new LeechException(returnContent.toString());
            }
        } catch (Exception e) {
            LoggerFactory.getLogger(ToElasticSearchContentHandler.class.getName()).error("Error in Elasticsearch bulk request:\n" + this.m_strbCurrentNdJsonBulk, e);
            throw new RuntimeException(e);
        }
    }

    public ToElasticSearchContentHandler index(String str) {
        this.m_strEsSearchIndex = str;
        return this;
    }

    public ToElasticSearchContentHandler leechAttRenames() {
        this.m_hsRenameAtts = CollectionUtilz.createHashMap(new Object[]{"dc:title", "tns_title", "dc:creator", "tns_creator", "dcterms:modified", "date_modified", "source", "tk_source", "dcterms:created", "date_created", "xmpTPg:NPages", "i_pageCount", "dc:language", "k_language", LeechMetadata.body, "tns_body", "location", "g_location"});
        return this;
    }

    public ToElasticSearchContentHandler port(int i) {
        this.m_iPort = i;
        return this;
    }

    @Override // de.dfki.km.leech.sax.DataSinkContentHandler
    public void processErrorData(Metadata metadata) {
    }

    @Override // de.dfki.km.leech.sax.DataSinkContentHandler
    public void processModifiedData(Metadata metadata, String str) {
        try {
            StringBuilder sb = new StringBuilder();
            String str2 = metadata.get("dataEntityId");
            if (str2 == null) {
                str2 = metadata.get(LeechMetadata.id);
            }
            if (str2 == null) {
                str2 = metadata.get("source");
            }
            if (str2 == null) {
                str2 = metadata.get(LeechMetadata.RESOURCE_NAME_KEY);
            }
            sb.append(String.format("{\"update\":{\"_id\":\"%s\"}}\n", JSONValue.escape(str2)));
            sb.append("{\"doc\":{");
            boolean z = true;
            for (String str3 : metadata.names()) {
                if (!z) {
                    sb.append(',');
                }
                sb.append('\"').append(JSONValue.escape(this.m_hsRenameAtts.getOrDefault(str3, str3))).append("\":[");
                boolean z2 = true;
                for (String str4 : metadata.getValues(str3)) {
                    if (!z2) {
                        sb.append(',');
                    }
                    sb.append('\"').append(JSONValue.escape(str4)).append('\"');
                    z2 = false;
                }
                sb.append(']');
                z = false;
            }
            if (!z) {
                sb.append(',');
            }
            sb.append('\"').append(this.m_hsRenameAtts.getOrDefault(LeechMetadata.body, LeechMetadata.body)).append('\"');
            sb.append(":\"").append(JSONValue.escape(str)).append('\"');
            sb.append("}}\n");
            this.m_strbCurrentNdJsonBulk.append((CharSequence) sb);
            this.m_iCurrentBulkSize++;
            if (this.m_iCurrentBulkSize < this.m_iMaxBulkSize) {
                return;
            }
            Content returnContent = Request.Put(String.format("http://%s:%s/%s/_bulk", this.m_strEsUrl, Integer.valueOf(this.m_iPort), this.m_strEsSearchIndex)).bodyString(this.m_strbCurrentNdJsonBulk.toString(), ContentType.APPLICATION_JSON).execute().returnContent();
            this.m_strbCurrentNdJsonBulk = new StringBuilder();
            this.m_iCurrentBulkSize = 0;
            if (((Boolean) JsonPath.read(returnContent.toString(), "$.errors", new Predicate[0])).booleanValue()) {
                throw new LeechException(returnContent.toString());
            }
        } catch (Exception e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }

    @Override // de.dfki.km.leech.sax.DataSinkContentHandler
    public void processNewData(Metadata metadata, String str) {
        try {
            String str2 = metadata.get("dataEntityId");
            if (str2 == null) {
                str2 = metadata.get(LeechMetadata.id);
            }
            if (str2 == null) {
                str2 = metadata.get("source");
            }
            if (str2 == null) {
                str2 = metadata.get(LeechMetadata.RESOURCE_NAME_KEY);
            }
            StringBuilder sb = new StringBuilder();
            if (str2 != null) {
                sb.append(String.format("{\"create\":{\"_id\":\"%s\"}}\n", JSONValue.escape(str2)));
            } else {
                sb.append("{\"create\":{}}\n");
            }
            sb.append('{');
            boolean z = true;
            for (String str3 : metadata.names()) {
                if (!z) {
                    sb.append(',');
                }
                sb.append('\"').append(JSONValue.escape(this.m_hsRenameAtts.getOrDefault(str3, str3))).append("\":[");
                boolean z2 = true;
                for (String str4 : metadata.getValues(str3)) {
                    if (!z2) {
                        sb.append(',');
                    }
                    sb.append('\"').append(JSONValue.escape(str4)).append('\"');
                    z2 = false;
                }
                sb.append(']');
                z = false;
            }
            if (!z) {
                sb.append(',');
            }
            sb.append('\"').append(JSONValue.escape(this.m_hsRenameAtts.getOrDefault(LeechMetadata.body, LeechMetadata.body))).append('\"');
            sb.append(":\"").append(JSONValue.escape(str)).append('\"');
            sb.append("}\n");
            this.m_strbCurrentNdJsonBulk.append((CharSequence) sb);
            this.m_iCurrentBulkSize++;
            if (this.m_iCurrentBulkSize < this.m_iMaxBulkSize) {
                return;
            }
            Content returnContent = Request.Put(String.format("http://%s:%s/%s/_bulk", this.m_strEsUrl, Integer.valueOf(this.m_iPort), this.m_strEsSearchIndex)).bodyString(this.m_strbCurrentNdJsonBulk.toString(), ContentType.APPLICATION_JSON).execute().returnContent();
            if (((Boolean) JsonPath.read(returnContent.toString(), "$.errors", new Predicate[0])).booleanValue()) {
                throw new LeechException(returnContent.toString());
            }
            this.m_strbCurrentNdJsonBulk = new StringBuilder();
            this.m_iCurrentBulkSize = 0;
        } catch (Exception e) {
            LoggerFactory.getLogger(ToElasticSearchContentHandler.class.getName()).error("Error in Elasticsearch bulk request:\n" + this.m_strbCurrentNdJsonBulk, e);
            throw new RuntimeException(e);
        }
    }

    @Override // de.dfki.km.leech.sax.DataSinkContentHandler
    public void processProcessedData(Metadata metadata) {
    }

    @Override // de.dfki.km.leech.sax.DataSinkContentHandler
    public void processRemovedData(Metadata metadata) {
        try {
            String str = metadata.get("dataEntityId");
            if (str == null) {
                str = metadata.get(LeechMetadata.id);
            }
            if (str == null) {
                str = metadata.get("source");
            }
            if (str == null) {
                str = metadata.get(LeechMetadata.RESOURCE_NAME_KEY);
            }
            this.m_strbCurrentNdJsonBulk.append(String.format("{\"delete\":{\"_id\":\"%s\"}}\n", JSONValue.escape(str)));
            this.m_iCurrentBulkSize++;
            if (this.m_iCurrentBulkSize < this.m_iMaxBulkSize) {
                return;
            }
            Content returnContent = Request.Put(String.format("http://%s:%s/%s/_bulk", this.m_strEsUrl, Integer.valueOf(this.m_iPort), this.m_strEsSearchIndex)).bodyString(this.m_strbCurrentNdJsonBulk.toString(), ContentType.APPLICATION_JSON).execute().returnContent();
            this.m_strbCurrentNdJsonBulk = new StringBuilder();
            this.m_iCurrentBulkSize = 0;
            if (((Boolean) JsonPath.read(returnContent.toString(), "$.errors", new Predicate[0])).booleanValue()) {
                throw new LeechException(returnContent.toString());
            }
        } catch (Exception e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }

    @Override // de.dfki.km.leech.sax.DataSinkContentHandler
    public void processUnmodifiedData(Metadata metadata) {
    }

    public ToElasticSearchContentHandler server(String str) {
        this.m_strEsUrl = str;
        return this;
    }
}
