package de.dfki.leech.repairpedia;

import de.dfki.inquisitor.text.DateParser;
import de.dfki.inquisitor.text.DateUtils;
import de.dfki.inquisitor.text.StringUtils;
import de.dfki.km.leech.sax.DataSinkContentHandler;
import de.dfki.km.leech.sax.DataSinkContentHandlerDecorator;
import de.dfki.km.leech.solr.SolrIndexCreator;
import java.io.File;
import java.util.regex.MatchResult;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;

/* loaded from: input_file:de/dfki/leech/repairpedia/RepairPediaSolrIndexCreator.class */
public class RepairPediaSolrIndexCreator {

    /* loaded from: input_file:de/dfki/leech/repairpedia/RepairPediaSolrIndexCreator$RepairPediaPostprocessor.class */
    public static class RepairPediaPostprocessor extends DataSinkContentHandlerDecorator {
        public RepairPediaPostprocessor() {
        }

        public RepairPediaPostprocessor(DataSinkContentHandler dataSinkContentHandler) {
            super(dataSinkContentHandler);
        }

        public void processNewData(Metadata metadata, String str) {
            String str2 = metadata.get("resourceName");
            if (str2.contains("/zkf-tipps/")) {
                try {
                    MatchResult findFirstMatch = StringUtils.findFirstMatch(".*zkf-tipps/(.+?)/(.+?)/(\\D+)(\\d+) - (.+?) - (.+)\\....", str2);
                    String group = findFirstMatch.group(1);
                    String group2 = findFirstMatch.group(2);
                    String group3 = findFirstMatch.group(4);
                    String group4 = findFirstMatch.group(5);
                    String group5 = findFirstMatch.group(6);
                    metadata.add(TikaCoreProperties.LANGUAGE.getName(), group);
                    metadata.add("manufacturer_s", group2);
                    metadata.add("sequencenumber_s", group3);
                    metadata.add("automotivetype", group4);
                    metadata.remove("title");
                    metadata.add("title", group5);
                } catch (Exception e) {
                    throw new RuntimeException("Error parsing zkf-tipps filename", e);
                }
            } else if (str2.contains("/ifl_zkf_web/")) {
                metadata.remove("source");
                metadata.add("source", "http://www.zkf.de/index.php?eID=tx_nawsecuredl&u=2803&file=uploads/media/" + new File(str2).getName() + "&t=1449334349&hash=f2cc50b542ef17209846df26e5732c777e63b12f");
            }
            metadata.add("doctype_s", StringUtils.findFirstMatch(".*/data/(.+?)/.*", str2).group(1));
            metadata.remove("dataEntityId");
            metadata.add("dataEntityId", StringUtils.findFirstMatch(".*/(data/.*)", str2).group(1));
            metadata.add("sourcefilename_s", new File(str2).getName());
            metadata.remove("created");
            String str3 = metadata.get("dcterms:created");
            if (!StringUtils.nullOrWhitespace(str3)) {
                metadata.add("created", DateUtils.date2SolrString(DateParser.parseDateString(str3)));
            }
            String str4 = metadata.get("modified");
            if (!StringUtils.nullOrWhitespace(str4)) {
                String date2SolrString = DateUtils.date2SolrString(DateParser.parseDateString(str4));
                metadata.remove("modified");
                metadata.add("modified", date2SolrString);
            }
            metadata.remove("access_permission:extract_for_accessibility");
            metadata.remove("meta:save-date");
            metadata.remove("dcterms:created");
            metadata.remove("Creation-Date");
            metadata.remove("Author");
            metadata.remove("meta:author");
            metadata.remove("date");
            metadata.remove("access_permission:can_modify");
            metadata.remove("access_permission:modify_annotations");
            metadata.remove("access_permission:fill_in_form");
            metadata.remove("dc:format");
            metadata.remove("access_permission:can_print");
            metadata.remove("xmp:CreatorTool");
            metadata.remove("Last-Save-Date");
            metadata.remove("dc:title");
            metadata.remove("access_permission:assemble_document");
            metadata.remove("dcterms:modified");
            metadata.remove("Last-Modified");
            metadata.remove("meta:creation-date");
            metadata.remove("dc:creator");
            metadata.remove("pdf:PDFVersion");
            metadata.remove("X-Parsed-By");
            metadata.remove("xmpTPg:NPages");
            metadata.remove("access_permission:can_print_degraded");
            metadata.remove("pdf:encrypted");
            metadata.remove("access_permission:extract_content");
            metadata.remove("producer");
            String str5 = metadata.get("creator");
            if (!StringUtils.nullOrWhitespace(str5)) {
                String replaceAll = str5.replaceAll("(.)(\\p{Upper})", "$1 $2");
                metadata.set("creator", replaceAll.replaceFirst(".", replaceAll.substring(0, 1).toUpperCase()));
            }
            super.processNewData(metadata, str);
        }
    }

    public static void main(String[] strArr) throws Exception {
        new SolrIndexCreator() { // from class: de.dfki.leech.repairpedia.RepairPediaSolrIndexCreator.1
            public DataSinkContentHandlerDecorator getPostprocessingHandler() {
                return new RepairPediaPostprocessor();
            }
        }.createIndex(strArr);
    }
}
