package de.dfki.km.perspecting.obie.transducer;

import de.dfki.km.perspecting.obie.connection.KnowledgeBase;
import de.dfki.km.perspecting.obie.model.Document;
import de.dfki.km.perspecting.obie.model.RegexSymbolPattern;
import de.dfki.km.perspecting.obie.model.SemanticEntity;
import de.dfki.km.perspecting.obie.model.TextCorpus;
import de.dfki.km.perspecting.obie.model.TextPointer;
import de.dfki.km.perspecting.obie.model.Token;
import de.dfki.km.perspecting.obie.model.TokenSequence;
import de.dfki.km.perspecting.obie.workflow.Transducer;
import java.io.BufferedReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.regex.Matcher;

/* loaded from: input_file:de/dfki/km/perspecting/obie/transducer/RegularStructuredEntityRecognition.class */
public class RegularStructuredEntityRecognition extends Transducer {
    private static final ExecutorService pool = Executors.newCachedThreadPool();

    @Override // de.dfki.km.perspecting.obie.workflow.Transducer
    public void transduce(final Document document, final KnowledgeBase knowledgeBase, TextCorpus textCorpus) throws Exception {
        ArrayList arrayList = new ArrayList();
        if (document.getTokens().isEmpty()) {
            return;
        }
        Collection<RegexSymbolPattern> collection = (Collection) this.languageMap.get(document.getSource().getLanguage()).getModel();
        final String plainTextContent = document.getSource().getPlainTextContent();
        ArrayList arrayList2 = new ArrayList();
        for (final RegexSymbolPattern regexSymbolPattern : collection) {
            arrayList2.add(pool.submit(new Callable<List<TokenSequence<TextPointer>>>() { // from class: de.dfki.km.perspecting.obie.transducer.RegularStructuredEntityRecognition.1
                /* JADX WARN: Can't rename method to resolve collision */
                @Override // java.util.concurrent.Callable
                public List<TokenSequence<TextPointer>> call() throws Exception {
                    return RegularStructuredEntityRecognition.this.matchPatterns(document, regexSymbolPattern, plainTextContent, knowledgeBase);
                }
            }));
        }
        Iterator it = arrayList2.iterator();
        while (it.hasNext()) {
            arrayList.addAll((Collection) ((Future) it.next()).get());
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public List<TokenSequence<TextPointer>> matchPatterns(Document document, RegexSymbolPattern regexSymbolPattern, String str, KnowledgeBase knowledgeBase) throws Exception {
        List<Token> tokens = document.getTokens();
        ArrayList arrayList = new ArrayList();
        Matcher matcher = regexSymbolPattern.getRegex().matcher(str);
        Iterator<Token> it = tokens.iterator();
        ArrayList arrayList2 = new ArrayList();
        while (matcher.find()) {
            int start = matcher.start();
            int end = matcher.end();
            Token token = null;
            do {
                if (it.hasNext()) {
                    token = it.next();
                }
                if (!it.hasNext()) {
                    break;
                }
            } while (token.getStart() < start);
            while (token != null && token.getEnd() <= end) {
                arrayList2.add(token);
                if (!it.hasNext()) {
                    break;
                }
                token = it.next();
            }
            Integer bestRatedEntry = regexSymbolPattern.getBestRatedEntry();
            for (int i = 0; i < arrayList2.size(); i++) {
                if (i == 0) {
                    ((Token) arrayList2.get(i)).addProperty("B", -1, bestRatedEntry.intValue());
                } else {
                    ((Token) arrayList2.get(i)).addProperty("I", -1, bestRatedEntry.intValue());
                }
            }
            arrayList2.clear();
        }
        return arrayList;
    }

    @Override // de.dfki.km.perspecting.obie.workflow.Transducer
    public String compare(Document document, KnowledgeBase knowledgeBase, Reader reader) throws Exception {
        HashSet hashSet = new HashSet();
        BufferedReader bufferedReader = new BufferedReader(reader);
        String readLine = bufferedReader.readLine();
        while (true) {
            String str = readLine;
            if (str == null) {
                break;
            }
            hashSet.add(str);
            readLine = bufferedReader.readLine();
        }
        HashSet hashSet2 = new HashSet();
        for (TokenSequence<SemanticEntity> tokenSequence : document.getRetrievedPropertyValues()) {
            hashSet2.add(knowledgeBase.getURI(tokenSequence.getValue().getPropertyIndex()) + "\t" + tokenSequence.toString());
        }
        new HashSet(hashSet).retainAll(hashSet2);
        return String.format("%i\t%i\t%d\t%d\n", Integer.valueOf(hashSet.size()), Integer.valueOf(hashSet2.size()), Double.valueOf(r0.size() / hashSet.size()), Double.valueOf(r0.size() / hashSet2.size()));
    }
}
