package de.dfki.km.perspecting.obie.symbolization;

import de.dfki.km.perspecting.obie.connection.OntologySession;
import de.dfki.km.perspecting.obie.connection.ontology.ResultSetCallback;
import de.dfki.km.perspecting.obie.model.Annotation;
import de.dfki.km.perspecting.obie.model.DataSheet;
import de.dfki.km.perspecting.obie.model.Model;
import de.dfki.km.perspecting.obie.model.Record;
import de.dfki.km.perspecting.obie.model.SuffixArray;
import de.dfki.km.perspecting.obie.model.TextPointer;
import de.dfki.km.perspecting.obie.model.Token;
import de.dfki.km.perspecting.obie.utils.logging.ScoobieLogging;
import de.dfki.km.perspecting.obie.workflow.tasks.ContentSymbolRecognition;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.search.IndexSearcher;

/* loaded from: input_file:de/dfki/km/perspecting/obie/symbolization/SAContentSymbolRecognition.class */
public class SAContentSymbolRecognition implements ContentSymbolRecognition {
    private final Logger log = Logger.getLogger(SAContentSymbolRecognition.class.getName());
    private SAContentSymbolRecognitionModel model;
    private IndexSearcher searcher;
    private HashMap<String, Double> idfMap;
    private Record record;

    @Override // de.dfki.km.perspecting.obie.workflow.tasks.ContentSymbolRecognition
    public void recognizeContentSymbols(Record record, SuffixArray suffixArray, Model<OntologySession> model) throws Exception {
        OntologySession model2 = model.getModel();
        this.record = record;
        this.model = (SAContentSymbolRecognitionModel) model;
        long currentTimeMillis = System.currentTimeMillis();
        ScoobieLogging.log(model2.getSession(), record.getDocument().getUri(), "Starting matching of datatype property values", this.log, Level.INFO);
        List<Annotation<TextPointer>> matchDatatypePropertyValues = matchDatatypePropertyValues(record.getTokens(), model2, suffixArray, suffixArray.getCommonPrefixStrings());
        ScoobieLogging.log(model2.getSession(), record.getDocument().getUri(), "Match of datatype property values  took: " + (System.currentTimeMillis() - currentTimeMillis), this.log, Level.INFO);
        ScoobieLogging.log(model2.getSession(), record.getDocument().getUri(), "Found " + matchDatatypePropertyValues.size() + " annotations.", this.log, Level.INFO);
        record.getDatasheet().standardize(Annotation.POSITION);
        record.getDatasheet().standardize(Annotation.LENGTH);
        record.getDatasheet().standardize(Annotation.TF);
        record.getDatasheet().standardize(Annotation.IDF);
        record.getDatasheet().standardize(Annotation.HIST_LEN);
        record.getDatasheet().standardize(Annotation.IPF);
        record.getDatasheet().fuseRatings(Annotation.NER_BELIEF, DataSheet.PRODUCT, Annotation.TF, Annotation.IDF, Annotation.LENGTH, Annotation.POSITION);
        record.getDatasheet().standardize(Annotation.NER_BELIEF);
        ArrayList arrayList = new ArrayList();
        record.getDatasheet().trim(Annotation.NER_BELIEF);
        for (Annotation<TextPointer> annotation : matchDatatypePropertyValues) {
            if (annotation.getBelief() > 0.0d) {
                arrayList.add(annotation);
            }
        }
        record.setContentSymbols(matchDatatypePropertyValues);
    }

    private List<Annotation<TextPointer>> matchDatatypePropertyValues(List<Token> list, OntologySession ontologySession, SuffixArray suffixArray, Set<Integer> set) throws Exception {
        ArrayList arrayList = new ArrayList();
        try {
            ScoobieLogging.log(ontologySession.getSession(), SAContentSymbolRecognition.class.getName(), "Request Symbols Candidates", this.log);
            long currentTimeMillis = System.currentTimeMillis();
            ResultSetCallback datatypePropertyValues = ontologySession.getDatatypePropertyValues(-1, null, set);
            ScoobieLogging.log(ontologySession.getSession(), this.record.getDocument().getUri(), "Request Symbols Candidates took: " + (System.currentTimeMillis() - currentTimeMillis), this.log);
            long currentTimeMillis2 = System.currentTimeMillis();
            ScoobieLogging.log(ontologySession.getSession(), this.record.getDocument().getUri(), "Starting SuffixArrayComparison", this.log);
            List<TextPointer> compare = suffixArray.compare(datatypePropertyValues.getRs());
            ScoobieLogging.log(ontologySession.getSession(), this.record.getDocument().getUri(), "SuffixArrayComparison took: " + (System.currentTimeMillis() - currentTimeMillis2), this.log);
            datatypePropertyValues.close();
            Collections.sort(compare);
            for (TextPointer textPointer : filterLongestMatches(ontologySession, compare)) {
                ArrayList arrayList2 = new ArrayList();
                Iterator<Token> it = list.iterator();
                Token next = it.next();
                while (it.hasNext() && next.getStart() < textPointer.getA()) {
                    next = it.next();
                }
                while (next.getStart() < textPointer.getB()) {
                    arrayList2.add(next);
                    if (!it.hasNext()) {
                        break;
                    }
                    next = it.next();
                }
                if (!arrayList2.isEmpty() && ((Token) arrayList2.get(arrayList2.size() - 1)).getEnd() == textPointer.getB()) {
                    Annotation<TextPointer> createAnnotation = this.record.getDatasheet().createAnnotation(-textPointer.getData(), (int) textPointer, (Token[]) arrayList2.toArray(new Token[arrayList2.size()]));
                    createAnnotation.add(Annotation.RDF_PROPERTY, Integer.valueOf(textPointer.getDatatypeProperty()));
                    arrayList.add(createAnnotation);
                }
            }
            return mergeAnotations(arrayList);
        } catch (Exception e) {
            ScoobieLogging.log(ontologySession.getSession(), this.record.getDocument().getUri(), e, this.log);
            throw e;
        }
    }

    private List<Annotation<TextPointer>> mergeAnotations(List<Annotation<TextPointer>> list) {
        HashMap hashMap = new HashMap();
        ArrayList arrayList = new ArrayList();
        HashSet hashSet = new HashSet();
        for (Annotation<TextPointer> annotation : list) {
            String lowerCase = annotation.getValue().toString().toLowerCase();
            Map<Integer, List<Annotation<TextPointer>>> map = hashMap.get(lowerCase);
            if (map == null) {
                map = new HashMap();
                hashMap.put(lowerCase, map);
            }
            List<Annotation<TextPointer>> list2 = map.get(Integer.valueOf(annotation.getValue().getDatatypeProperty()));
            if (list2 == null) {
                list2 = new ArrayList();
                map.put(Integer.valueOf(annotation.getValue().getDatatypeProperty()), list2);
            }
            list2.add(annotation);
            hashSet.add(annotation.getValue().getA() + "_" + annotation.getValue().getB());
        }
        Map<Integer, Integer> literalLengthHistogram = this.model.getLiteralLengthHistogram();
        int i = 0;
        Iterator<Integer> it = literalLengthHistogram.values().iterator();
        while (it.hasNext()) {
            i += it.next().intValue();
        }
        Iterator<String> it2 = hashMap.keySet().iterator();
        while (it2.hasNext()) {
            Map<Integer, List<Annotation<TextPointer>>> map2 = hashMap.get(it2.next());
            for (Integer num : map2.keySet()) {
                ArrayList arrayList2 = new ArrayList();
                int i2 = 0;
                for (Annotation<TextPointer> annotation2 : map2.get(num)) {
                    arrayList2.add(annotation2.getValue());
                    i2 = -annotation2.getValue().getData();
                }
                Annotation<TextPointer> createAnnotation = this.record.getDatasheet().createAnnotation(i2, arrayList2, map2.get(num).get(0).getTokens());
                createAnnotation.add(Annotation.RDF_PROPERTY, num);
                double rateWithSymbolFrequency = rateWithSymbolFrequency(createAnnotation, hashMap, hashSet.size());
                double rateWithLiteralLengthHistogram = rateWithLiteralLengthHistogram(createAnnotation, literalLengthHistogram, i);
                double rateWithInverseDocumentFrequency = rateWithInverseDocumentFrequency(createAnnotation);
                double a = 1.0d / createAnnotation.getValue().getA();
                double b = createAnnotation.getValue().getB() - createAnnotation.getValue().getA();
                createAnnotation.add(Annotation.STRING, createAnnotation.getValue().toString());
                createAnnotation.add(Annotation.POSITION, Double.valueOf(a));
                createAnnotation.add(Annotation.LENGTH, Double.valueOf(b));
                createAnnotation.add(Annotation.TF, Double.valueOf(rateWithSymbolFrequency));
                createAnnotation.add(Annotation.IDF, Double.valueOf(rateWithInverseDocumentFrequency));
                createAnnotation.add(Annotation.HIST_LEN, Double.valueOf(rateWithLiteralLengthHistogram));
                createAnnotation.add(Annotation.IPF, Double.valueOf(1.0d / map2.keySet().size()));
                arrayList.add(createAnnotation);
            }
        }
        return arrayList;
    }

    private double rateWithLiteralLengthHistogram(Annotation<TextPointer> annotation, Map<Integer, Integer> map, int i) {
        return 1.0d - (map.get(Integer.valueOf(annotation.getValue().getB() - annotation.getValue().getA())).intValue() / i);
    }

    private double rateWithInverseDocumentFrequency(Annotation<TextPointer> annotation) {
        double d = 0.0d;
        if (this.idfMap == null) {
            return 1.0d;
        }
        for (Token token : annotation.getTokens()) {
            Double d2 = this.idfMap.get(token.toString());
            if (d2 != null) {
                d += d2.doubleValue();
            }
        }
        return d / annotation.getTokens().length;
    }

    private double rateWithSymbolFrequency(Annotation<TextPointer> annotation, Map<String, Map<Integer, List<Annotation<TextPointer>>>> map, int i) {
        String lowerCase = annotation.getValue().toString().toLowerCase();
        HashSet hashSet = new HashSet();
        Iterator<List<Annotation<TextPointer>>> it = map.get(lowerCase).values().iterator();
        while (it.hasNext()) {
            for (Annotation<TextPointer> annotation2 : it.next()) {
                hashSet.add(annotation2.getValue().getA() + "_" + annotation2.getValue().getB());
            }
        }
        return hashSet.size() / i;
    }

    private List<TextPointer> filterLongestMatches(OntologySession ontologySession, List<TextPointer> list) {
        ArrayList arrayList = new ArrayList();
        TextPointer textPointer = null;
        for (int i = 0; i < list.size(); i++) {
            TextPointer textPointer2 = list.get(i);
            if (textPointer == null) {
                textPointer = textPointer2;
                arrayList.add(textPointer2);
            } else if (textPointer.length() <= textPointer2.length() || textPointer2.getA() < textPointer.getA() || textPointer2.getB() > textPointer.getB()) {
                textPointer = textPointer2;
                arrayList.add(textPointer2);
            }
        }
        return arrayList;
    }

    @Override // de.dfki.km.perspecting.obie.workflow.tasks.ContentSymbolRecognition
    public void setExtractionSession(String str, String str2) throws Exception {
        if (this.searcher == null) {
            this.searcher = new IndexSearcher(str);
            TermEnum terms = this.searcher.getIndexReader().terms();
            this.idfMap = new HashMap<>();
            double d = 0.0d;
            while (terms.next()) {
                Term term = terms.term();
                double log = Math.log(this.searcher.getIndexReader().numDocs() / this.searcher.docFreq(term)) + 1.0d;
                d = Math.max(d, log);
                this.idfMap.put(term.text(), Double.valueOf(log));
            }
            for (Map.Entry<String, Double> entry : this.idfMap.entrySet()) {
                entry.setValue(Double.valueOf(entry.getValue().doubleValue() / d));
            }
        }
    }
}
