package de.dfki.leech;

import com.google.gson.stream.JsonReader;
import com.google.gson.stream.JsonToken;
import de.dfki.inquisitor.collections.CollectionUtilz;
import de.dfki.inquisitor.collections.MultiValueHashMap;
import de.dfki.inquisitor.json.JsonStreamingWithPathUtil;
import de.dfki.inquisitor.text.StringUtils;
import de.dfki.km.leech.SubDataEntityContentHandler;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipInputStream;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/* loaded from: input_file:de/dfki/leech/AbstractJsonParser.class */
public class AbstractJsonParser extends AbstractParser {
    private static final long serialVersionUID = 6337211184265059251L;
    protected static Set<MediaType> SUPPORTED_TYPES = Set.of(MediaType.text("json"), MediaType.text("json_zip"), MediaType.text("json_gzip"));
    protected String m_strUpperSource;
    protected String m_strUpperContentType;
    protected MultiValueHashMap<String, String> m_hsJsonPath2AttName = new MultiValueHashMap<>();
    public boolean showHandledMetadata = false;
    public boolean skipDoubleEntries4AttValuePairs = true;
    public boolean ignoreHistory = true;

    public String getDefaultAttributeName() {
        return "noAttributeNameSpecified";
    }

    public MultiValueHashMap<String, String> getPath2AttNameMappings() {
        return this.m_hsJsonPath2AttName;
    }

    public String getPath2JsonObjectOrArray2Extract() {
        return "$";
    }

    public int getSkipValuesBeyondExtractPathHops() {
        return 0;
    }

    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return SUPPORTED_TYPES;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void handleMetadata(MultiValueHashMap<String, String> multiValueHashMap, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws Exception {
        for (String str : metadata.names()) {
            metadata.remove(str);
        }
        if (!multiValueHashMap.containsKey("source")) {
            metadata.add("source", this.m_strUpperSource);
        }
        if (!multiValueHashMap.containsKey("Content-Type")) {
            metadata.add("Content-Type", this.m_strUpperContentType);
        }
        String str2 = "";
        for (Map.Entry entry : multiValueHashMap.entryList()) {
            String str3 = (String) entry.getKey();
            String str4 = (String) entry.getValue();
            if ("body".equalsIgnoreCase(str3)) {
                str2 = str4;
            } else {
                metadata.add(str3, str4);
            }
        }
        SubDataEntityContentHandler subDataEntityContentHandler = new SubDataEntityContentHandler(contentHandler, metadata, str2);
        if (this.ignoreHistory) {
            subDataEntityContentHandler.triggerSubDataEntityHandling();
        } else {
            subDataEntityContentHandler.triggerSubDataEntityHandling(parseContext);
        }
    }

    public void parse(InputStream inputStream, final ContentHandler contentHandler, final Metadata metadata, final ParseContext parseContext) throws IOException, SAXException, TikaException {
        if (metadata.get("source").endsWith(".gz")) {
            inputStream = new GZIPInputStream(inputStream);
        }
        if (metadata.get("source").endsWith(".zip")) {
            inputStream = new ZipInputStream(inputStream);
        }
        this.m_strUpperSource = metadata.get("source");
        this.m_strUpperContentType = metadata.get("Content-Type");
        JsonReader jsonReader = new JsonReader(new InputStreamReader(inputStream));
        try {
            JsonStreamingWithPathUtil.handleJson(jsonReader, getPath2JsonObjectOrArray2Extract(), (JsonStreamingWithPathUtil.JsonAttSimpleValuePairHandler) null);
            boolean z = JsonToken.BEGIN_ARRAY.equals(jsonReader.peek());
            MultiValueHashMap<String, String> multiValueHashMap = this.skipDoubleEntries4AttValuePairs ? new MultiValueHashMap<>(HashSet.class) : new MultiValueHashMap<>();
            multiValueHashMap.add("containerSource", this.m_strUpperSource);
            multiValueHashMap.add("containerSourceContentType", this.m_strUpperContentType);
            final boolean z2 = z;
            final MultiValueHashMap<String, String> multiValueHashMap2 = multiValueHashMap;
            JsonStreamingWithPathUtil.handleJson(jsonReader, (String) null, new JsonStreamingWithPathUtil.JsonAttSimpleValuePairHandler() { // from class: de.dfki.leech.AbstractJsonParser.1
                protected int m_iLastArrayIndex = 0;

                public void handleAttValuePair(String str, String str2, String str3, JsonToken jsonToken, List<Integer> list) {
                    int intValue;
                    if (AbstractJsonParser.this.showHandledMetadata) {
                        System.out.println(str + " => " + str3);
                    }
                    if (StringUtils.nullOrWhitespace(str2)) {
                        str2 = AbstractJsonParser.this.getDefaultAttributeName();
                    }
                    if (z2 && list.size() > 0 && (intValue = list.get(0).intValue()) != this.m_iLastArrayIndex) {
                        try {
                            AbstractJsonParser.this.handleMetadata(multiValueHashMap2, contentHandler, metadata, parseContext);
                        } catch (Exception e) {
                            Logger.getLogger(AbstractJsonParser.class.getName()).log(Level.SEVERE, "Error handling " + str + " => " + str3, (Throwable) e);
                        }
                        this.m_iLastArrayIndex = intValue;
                        multiValueHashMap2.clear();
                        multiValueHashMap2.add("containerSource", AbstractJsonParser.this.m_strUpperSource);
                        multiValueHashMap2.add("containerSourceContentType", AbstractJsonParser.this.m_strUpperContentType);
                    }
                    Collection collection = AbstractJsonParser.this.getPath2AttNameMappings().get(str.replaceAll("\\[\\d+\\]", "[*]"));
                    if (CollectionUtilz.nullOrEmpty(collection)) {
                        if (StringUtils.countMatches(str.replace(AbstractJsonParser.this.getPath2JsonObjectOrArray2Extract(), ""), ".") - 1 <= AbstractJsonParser.this.getSkipValuesBeyondExtractPathHops()) {
                            multiValueHashMap2.add(str2, str3);
                        }
                    } else {
                        Iterator it = collection.iterator();
                        while (it.hasNext()) {
                            multiValueHashMap2.add((String) it.next(), str3);
                        }
                    }
                }
            });
            try {
                handleMetadata(multiValueHashMap, contentHandler, metadata, parseContext);
                jsonReader.close();
            } catch (Exception e) {
                throw new TikaException("Error: " + e.getMessage(), e);
            }
        } catch (Throwable th) {
            try {
                jsonReader.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }
}
