package de.dfki.leech.earlyTrendRadar;

import de.dfki.inquisitor.collections.CollectionUtilz;
import de.dfki.inquisitor.collections.MultiValueHashMap;
import de.dfki.inquisitor.text.DateUtils;
import de.dfki.inquisitor.text.StringUtils;
import de.dfki.km.leech.Leech;
import de.dfki.km.leech.sax.PrintlnContentHandler;
import de.dfki.leech.AbstractJsonParser;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Locale;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.xml.sax.ContentHandler;

/* loaded from: input_file:de/dfki/leech/earlyTrendRadar/BrandwatchJsonParser.class */
public class BrandwatchJsonParser extends AbstractJsonParser {
    private static final long serialVersionUID = 2872726190507335529L;
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(new HashSet(Arrays.asList(MediaType.application("vnd.etr.brandwatch"), MediaType.application("vnd.etr.brandwatch_zip"), MediaType.application("vnd.etr.brandwatch_gzip"))));
    protected static HashMap<String, String> hsContinentCodeOld2New = new HashMap<>();
    protected static MultiValueHashMap<String, String> hsPath2AttName = new MultiValueHashMap<>();
    protected static SimpleDateFormat dateFormat;

    public static void main(String[] strArr) throws Exception {
        new Leech().parse("/home/reuschling/muell/brandwatch507.bw.json.gz", new PrintlnContentHandler(PrintlnContentHandler.Verbosity.all) { // from class: de.dfki.leech.earlyTrendRadar.BrandwatchJsonParser.1
            boolean m_bFirst = false;

            public void processNewData(Metadata metadata, String str) {
                if (this.m_bFirst) {
                    return;
                }
                super.processNewData(metadata, str);
                this.m_bFirst = true;
            }
        }, new ParseContext());
    }

    public BrandwatchJsonParser() {
        this.ignoreHistory = true;
    }

    @Override // de.dfki.leech.AbstractJsonParser
    public MultiValueHashMap<String, String> getPath2AttNameMappings() {
        return hsPath2AttName;
    }

    @Override // de.dfki.leech.AbstractJsonParser
    public String getPath2JsonObjectOrArray2Extract() {
        return "$";
    }

    @Override // de.dfki.leech.AbstractJsonParser
    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return SUPPORTED_TYPES;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // de.dfki.leech.AbstractJsonParser
    public void handleMetadata(MultiValueHashMap<String, String> multiValueHashMap, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws Exception {
        String str;
        String str2;
        String str3 = (String) multiValueHashMap.getFirst("modified", new String[0]);
        if (!StringUtils.nullOrWhitespace(str3)) {
            String valueOf = String.valueOf(DateUtils.date2Number(dateFormat.parse(str3)));
            multiValueHashMap.remove("modified");
            multiValueHashMap.add("modified", valueOf);
            multiValueHashMap.remove("date");
            multiValueHashMap.add("date", valueOf);
        }
        Collection collection = multiValueHashMap.get("continentCode");
        if (collection.size() > 0 && collection.iterator().next() != null && (str2 = hsContinentCodeOld2New.get((str = (String) collection.iterator().next()))) != null) {
            multiValueHashMap.remove("continentCode", str);
            multiValueHashMap.add("continentCode", str2);
        }
        multiValueHashMap.addAll("sourceUrl", multiValueHashMap.get("source"));
        Collection collection2 = multiValueHashMap.get("globalSource");
        if (collection2.size() > 0 && collection2.iterator().next() != null) {
            String str4 = (String) collection2.iterator().next();
            String str5 = str4;
            Iterator it = CollectionUtilz.createLinkedList(new String[]{"www.", "www1.", "www2.", "www3."}).iterator();
            while (it.hasNext()) {
                str5 = str5.replaceFirst("^" + Pattern.quote((String) it.next()), "");
            }
            multiValueHashMap.remove("globalSource", str4);
            multiValueHashMap.add("globalSource", str5);
        }
        super.handleMetadata(multiValueHashMap, contentHandler, metadata, parseContext);
    }

    static {
        hsPath2AttName.add("$[*].fulltext", "body");
        hsPath2AttName.add("$[*].date", "date");
        hsPath2AttName.add("$[*].date", "modified");
        hsPath2AttName.add("$[*].date", "dataEntityContentFingerprint");
        hsPath2AttName.add("$[*].url", "source");
        hsPath2AttName.add("$[*].url", "dataEntityId");
        hsPath2AttName.add("$[*].author", "creator");
        hsPath2AttName.add("$[*].title", "title");
        hsPath2AttName.add("$[*].countryCode", "origin");
        hsPath2AttName.add("$[*].countryCode", "countryCode");
        hsPath2AttName.add("$[*].domain", "globalSource");
        hsPath2AttName.add("$[*].domain", "domain");
        hsContinentCodeOld2New.put("a-f", "af");
        hsContinentCodeOld2New.put("a-n", "an");
        hsContinentCodeOld2New.put("a-s", "as");
        hsContinentCodeOld2New.put("a-u", "au");
        hsContinentCodeOld2New.put("n-a", "na");
        hsContinentCodeOld2New.put("s-a", "sa");
        dateFormat = new SimpleDateFormat("yyyy-MM-dd", Locale.ENGLISH);
    }
}
