package de.dfki.leech;

import au.com.bytecode.opencsv.CSVReader;
import de.dfki.inquisitor.collections.CollectionUtilz;
import de.dfki.inquisitor.collections.MultiValueHashMap;
import de.dfki.inquisitor.collections.TwoValuesBox;
import de.dfki.inquisitor.collections.ValueBox;
import de.dfki.inquisitor.text.EncryptionUtils;
import de.dfki.inquisitor.text.StringUtils;
import de.dfki.km.leech.SubDataEntityContentHandler;
import de.dfki.km.leech.parser.CrawlerParser;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;

/* loaded from: input_file:de/dfki/leech/AbstractCsvParser.class */
public class AbstractCsvParser extends CrawlerParser {
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(new HashSet(Arrays.asList(MediaType.text("csv"))));
    protected static final String bodyMarker = "LeechAbstractCsvParserUniqueBodyMarker";
    private static final long serialVersionUID = 2048603422680874819L;
    protected MultiValueHashMap<String, TwoValuesBox<String, String>> m_hsAttributeExtractionMappings = new MultiValueHashMap<>();
    protected MultiValueHashMap<String, String> m_hsAttributeMappings = new MultiValueHashMap<>();
    private final MultiValueHashMap<String, Object> m_hsEmptyAttVals = new MultiValueHashMap<>();

    /* loaded from: input_file:de/dfki/leech/AbstractCsvParser$CsvParserConfig.class */
    public static class CsvParserConfig {
        public char escapeChar = '\\';
        public boolean ignoreLeadingWhiteSpace = true;
        public char quoteChar = '\"';
        public char separator = ',';
        public int skipLineCount = 0;
        public boolean strictQuotes = false;
    }

    public boolean firstLineColumnNames() {
        return true;
    }

    public MultiValueHashMap<String, String> getAttributeMappings() {
        return this.m_hsAttributeMappings;
    }

    public MultiValueHashMap<String, TwoValuesBox<String, String>> getAttributeValueExtractionRegExs() {
        return this.m_hsAttributeExtractionMappings;
    }

    public String[] getColumnNames() {
        return new String[0];
    }

    public CsvParserConfig getCsvParserConfig() {
        return new CsvParserConfig();
    }

    public String[] getDataEntityContentFingerprintColumNames() {
        return new String[0];
    }

    public String[] getDataEntityIdColumNames() {
        return new String[0];
    }

    public MultiValueHashMap<String, Object> getFurtherAttValPairs4Lines(MultiValueHashMap<String, Object> multiValueHashMap) {
        return this.m_hsEmptyAttVals;
    }

    protected Iterator<MultiValueHashMap<String, Object>> getSubDataEntitiesInformation(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws Exception {
        String[] columnNames;
        if (metadata.get("source").endsWith(".gz")) {
            inputStream = new GZIPInputStream(inputStream);
        }
        if (metadata.get("source").endsWith(".zip")) {
            inputStream = new ZipInputStream(inputStream);
        }
        String str = metadata.get("source");
        String str2 = metadata.get("Content-Type");
        String str3 = metadata.get("dataEntityId");
        String str4 = metadata.get("dataEntityContentFingerprint");
        HashSet hashSet = new HashSet();
        for (Map.Entry entry : getAttributeMappings().entryList()) {
            if ("body".equalsIgnoreCase((String) entry.getValue())) {
                hashSet.add((String) entry.getKey());
            }
        }
        LinkedList linkedList = new LinkedList();
        CsvParserConfig csvParserConfig = getCsvParserConfig();
        try {
            CSVReader cSVReader = new CSVReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8), csvParserConfig.separator, csvParserConfig.quoteChar, csvParserConfig.escapeChar, csvParserConfig.skipLineCount, csvParserConfig.strictQuotes, csvParserConfig.ignoreLeadingWhiteSpace);
            try {
                if (firstLineColumnNames()) {
                    String[] readNext = cSVReader.readNext();
                    if (readNext == null) {
                        Iterator<MultiValueHashMap<String, Object>> it = linkedList.iterator();
                        cSVReader.close();
                        return it;
                    }
                    columnNames = (String[]) readNext.clone();
                } else {
                    columnNames = getColumnNames();
                }
                int i = -1;
                while (true) {
                    String[] readNext2 = cSVReader.readNext();
                    if (readNext2 == null) {
                        break;
                    }
                    i++;
                    if (readNext2.length >= 1 && readNext2.length >= columnNames.length) {
                        MultiValueHashMap<String, Object> multiValueHashMap = new MultiValueHashMap<>();
                        MultiValueHashMap multiValueHashMap2 = new MultiValueHashMap();
                        String str5 = "";
                        for (int i2 = 0; i2 < readNext2.length && i2 < columnNames.length; i2++) {
                            String str6 = columnNames[i2];
                            String str7 = readNext2[i2];
                            multiValueHashMap2.add(str6, str7);
                            if (hashSet.contains(str6)) {
                                str5 = str5.length() == 0 ? str7 : str5 + "\n\n" + str7;
                            } else {
                                Collection collection = getAttributeMappings().get(str6);
                                if (CollectionUtilz.nullOrEmpty(collection)) {
                                    multiValueHashMap.add(str6, str7);
                                } else {
                                    Iterator it2 = collection.iterator();
                                    while (it2.hasNext()) {
                                        multiValueHashMap.add((String) it2.next(), str7);
                                    }
                                }
                                Collection<TwoValuesBox> collection2 = getAttributeValueExtractionRegExs().get(str6);
                                if (!CollectionUtilz.nullOrEmpty(collection2)) {
                                    for (TwoValuesBox twoValuesBox : collection2) {
                                        String str8 = (String) twoValuesBox.getFirst();
                                        String findGroup = StringUtils.findGroup((String) twoValuesBox.getSecond(), str7, 1);
                                        multiValueHashMap.add(str8, findGroup == null ? "" : findGroup);
                                    }
                                }
                            }
                        }
                        if (!ignoreEmptyLines() || !CollectionUtilz.nullOrEmptyOrEntireWhiteSpace(multiValueHashMap.values())) {
                            multiValueHashMap.add("Content-Type", str2);
                            multiValueHashMap.add("source", str + "_" + i);
                            multiValueHashMap.add("csvSource", str);
                            multiValueHashMap.add(bodyMarker, str5);
                            multiValueHashMap.addAll(getFurtherAttValPairs4Lines(multiValueHashMap));
                            if (!ignoreHistory()) {
                                if (getDataEntityIdColumNames().length == 0) {
                                    multiValueHashMap.add("dataEntityId", str3 + "_" + i);
                                } else {
                                    multiValueHashMap.add("dataEntityId", str3 + "__" + EncryptionUtils.sha1Hash((String) Arrays.stream(getDataEntityIdColumNames()).map(str9 -> {
                                        Collection collection3 = multiValueHashMap.get(str9);
                                        if (collection3.isEmpty()) {
                                            collection3 = multiValueHashMap2.get(str9);
                                        }
                                        return collection3.toString();
                                    }).collect(Collectors.joining("_§$%_"))));
                                }
                                if (getDataEntityContentFingerprintColumNames().length == 0) {
                                    multiValueHashMap.add("dataEntityContentFingerprint", str4);
                                } else {
                                    multiValueHashMap.add("dataEntityContentFingerprint", str3 + "__" + EncryptionUtils.sha1Hash((String) Arrays.stream(getDataEntityContentFingerprintColumNames()).map(str10 -> {
                                        Collection collection3 = multiValueHashMap.get(str10);
                                        if (collection3.isEmpty()) {
                                            collection3 = multiValueHashMap2.get(str10);
                                        }
                                        return collection3.toString();
                                    }).collect(Collectors.joining("_§$%_"))));
                                }
                            }
                            linkedList.add(multiValueHashMap);
                        }
                    }
                }
                cSVReader.close();
            } finally {
            }
        } catch (Exception e) {
            LoggerFactory.getLogger(AbstractCsvParser.class.getName()).error("Error", e);
        }
        return linkedList.iterator();
    }

    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return SUPPORTED_TYPES;
    }

    public boolean ignoreEmptyLines() {
        return true;
    }

    public boolean ignoreHistory() {
        return false;
    }

    protected void processCurrentDataEntity(InputStream inputStream, Metadata metadata, ContentHandler contentHandler, ParseContext parseContext) throws Exception {
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void processSubDataEntity(MultiValueHashMap<String, Object> multiValueHashMap, Metadata metadata, ContentHandler contentHandler, ParseContext parseContext) throws Exception {
        ValueBox valueBox = new ValueBox("");
        for (Map.Entry entry : multiValueHashMap.entryList()) {
            if (((String) entry.getKey()).equals(bodyMarker)) {
                valueBox.setValue(entry.getValue().toString());
            } else {
                metadata.add((String) entry.getKey(), entry.getValue().toString());
            }
        }
        SubDataEntityContentHandler subDataEntityContentHandler = new SubDataEntityContentHandler(contentHandler, metadata, (String) valueBox.getValue());
        if (ignoreHistory()) {
            subDataEntityContentHandler.triggerSubDataEntityHandling();
        } else {
            subDataEntityContentHandler.triggerSubDataEntityHandling(parseContext);
        }
    }
}
