package de.dfki.km.email2pimo.gazetteer.populators;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import de.dfki.km.email2pimo.gazetteer.FrequencyGazetteer;
import de.dfki.km.email2pimo.util.SparqlUtil;
import de.dfki.km.email2pimo.util.UmlautString;
import de.dfki.km.email2pimo.vocabularies.DBPedia;
import java.io.File;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.apache.log4j.Logger;
import org.ontoware.rdf2go.model.node.URI;
import org.ontoware.rdf2go.model.node.impl.URIImpl;

/* loaded from: input_file:de/dfki/km/email2pimo/gazetteer/populators/NameFrequencyGazetteerPopulator.class */
public class NameFrequencyGazetteerPopulator extends AbstractDBPediaPopulator {
    private static Logger log = Logger.getLogger(NameFrequencyGazetteerPopulator.class.getName());
    private Integer yearStart = 1850;
    private Integer yearEnd = Integer.valueOf(new GregorianCalendar().get(1));
    private Integer yearStep = 1;
    private ArrayListMultimap<URI, FrequencyGazetteer> gazetteers = ArrayListMultimap.create();
    private Map<String, Set<String>> processedPersonURIs = Maps.newHashMap();

    public void populate() {
        Iterator<URI> it = allWithRdfType(DBPedia.Ontology.Language).iterator();
        while (it.hasNext()) {
            populateByLanguage(it.next());
        }
    }

    public void populateByLanguage(URI uri) {
        log.info("Populating name frequency gazetteers from DBPedia by language " + uri.toSPARQL() + "...");
        populateByLanguage(uri, new URIImpl("http://xmlns.com/foaf/0.1/givenname".toString()));
        populateByLanguage(uri, new URIImpl("http://xmlns.com/foaf/0.1/surname".toString()));
    }

    public void populateByLanguage(URI uri, URI uri2) {
        FrequencyGazetteer gazetteer = getGazetteer(uri, uri2);
        Set<String> processedPersonURIs = getProcessedPersonURIs(uri, uri2);
        for (URI uri3 : countriesWithLanguage(uri)) {
            log.info("Adding " + uri2.toString().substring(1 + uri2.toString().lastIndexOf(47)) + "s of persons with birthPlace " + uri3.toSPARQL() + "...");
            int intValue = this.yearStart.intValue();
            while (true) {
                int i = intValue;
                if (i < this.yearEnd.intValue()) {
                    addResultSet(namesFromCountry(uri2, uri3, Integer.valueOf(i), Integer.valueOf(i + this.yearStep.intValue())), gazetteer, processedPersonURIs);
                    intValue = i + this.yearStep.intValue();
                }
            }
            addResultSet(namesFromCountry(uri2, uri3, null, null), gazetteer, processedPersonURIs);
        }
    }

    private void addResultSet(ResultSet resultSet, FrequencyGazetteer frequencyGazetteer, Set<String> set) {
        while (resultSet.hasNext()) {
            QuerySolution next = resultSet.next();
            String obj = next.getResource("?person").toString();
            if (!set.contains(obj)) {
                set.add(obj);
                for (String str : next.getLiteral("?name").getLexicalForm().split("[- ]")) {
                    String convert = UmlautString.convert(str);
                    if (isValid(convert)) {
                        frequencyGazetteer.addInstance(convert);
                    }
                }
            }
        }
    }

    private Set<URI> countriesWithLanguage(URI uri) {
        HashSet newHashSet = Sets.newHashSet();
        ResultSet execQuery = execQuery("SELECT ?country WHERE {   ?country rdf:type dbpo:Country .   ?country dbpo:language " + uri.toSPARQL() + " . }");
        while (execQuery.hasNext()) {
            newHashSet.add(new URIImpl(execQuery.next().getResource("?country").getURI()));
        }
        return newHashSet;
    }

    private ResultSet namesFromCountry(URI uri, URI uri2, Integer num, Integer num2) {
        StringBuilder sb = new StringBuilder();
        sb.append("SELECT DISTINCT ?person ?name WHERE {");
        sb.append("?person rdf:type foaf:Person . ");
        sb.append("?person ").append(uri.toSPARQL()).append(" ?name . ");
        sb.append("?person dbpp:birthPlace ").append(uri2.toSPARQL()).append(" . ");
        if (num != null || num2 != null) {
            sb.append("?person dbpo:birthDate ?date . ");
            sb.append("FILTER (");
            if (num != null) {
                sb.append("?date >= \"").append(this.yearStart).append("-01-01\"^^xsd:date");
            }
            if (num != null && num2 != null) {
                sb.append(" && ");
            }
            if (num2 != null) {
                sb.append("?date < \"").append(this.yearEnd).append("-01-01\"^^xsd:date");
            }
            sb.append(")");
        }
        sb.append("}");
        return execQuery(SparqlUtil.rewrite(sb.toString()));
    }

    private boolean isValid(String str) {
        if (str.length() < 3) {
            return false;
        }
        for (int i = 0; i < str.length(); i++) {
            if (!Character.isLetter(str.charAt(i))) {
                return false;
            }
        }
        return true;
    }

    private FrequencyGazetteer getGazetteer(URI uri, URI uri2) {
        for (FrequencyGazetteer frequencyGazetteer : this.gazetteers.get(uri)) {
            if (frequencyGazetteer.getConcept().equals(uri2.toString())) {
                return frequencyGazetteer;
            }
        }
        FrequencyGazetteer frequencyGazetteer2 = new FrequencyGazetteer(uri2.toString());
        this.gazetteers.put(uri, frequencyGazetteer2);
        return frequencyGazetteer2;
    }

    private Set<String> getProcessedPersonURIs(URI uri, URI uri2) {
        String str = uri.toString() + ":" + uri2.toString();
        Set<String> set = this.processedPersonURIs.get(str);
        if (set == null) {
            set = Sets.newHashSet();
            this.processedPersonURIs.put(str, set);
        }
        return set;
    }

    public int getYearStart() {
        return this.yearStart.intValue();
    }

    public void setYearStart(Integer num) {
        this.yearStart = num;
    }

    public Integer getYearEnd() {
        return this.yearEnd;
    }

    public void setYearEnd(Integer num) {
        this.yearEnd = num;
    }

    public Integer getYearStep() {
        return this.yearStep;
    }

    public void setYearStep(Integer num) {
        this.yearStep = num;
    }

    public ArrayListMultimap<URI, FrequencyGazetteer> getGazetteers() {
        return this.gazetteers;
    }

    public static void main(String[] strArr) {
        HashMap newHashMap = Maps.newHashMap();
        newHashMap.put("german", DBPedia.resource("German_language"));
        newHashMap.put("english", DBPedia.resource("English_language"));
        newHashMap.put("french", DBPedia.resource("French_language"));
        newHashMap.put("spanish", DBPedia.resource("Spanish_language"));
        for (String str : newHashMap.keySet()) {
            URI uri = (URI) newHashMap.get(str);
            NameFrequencyGazetteerPopulator nameFrequencyGazetteerPopulator = new NameFrequencyGazetteerPopulator();
            nameFrequencyGazetteerPopulator.setYearStep(10);
            nameFrequencyGazetteerPopulator.populateByLanguage(uri);
            for (FrequencyGazetteer frequencyGazetteer : nameFrequencyGazetteerPopulator.getGazetteers().get(uri)) {
                FrequencyGazetteer.saveToFile(frequencyGazetteer, new File(str + "_" + frequencyGazetteer.getConcept().substring(frequencyGazetteer.getConcept().lastIndexOf(47) + 1) + ".txt"));
            }
        }
    }
}
