package dfki.km.tweekreco.ner;

import de.dfki.delight.common.Documentation;
import de.dfki.inquisition.collections.CollectionUtilz;
import de.dfki.inquisition.collections.MultiValueHashMap;
import de.dfki.inquisition.collections.TwoValuesBox;
import de.dfki.inquisition.file.FileUtils;
import de.dfki.inquisition.text.StringUtils;
import dfki.km.tweekreco.ner.util.LuceneUtilz;
import dfki.km.tweekreco.ner.util.StandardTokenizingAnalyzer;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.text.ParseException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.annotation.PostConstruct;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTaggerME;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.synonym.SynonymMap;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.mlt.MoreLikeThis;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.search.spell.SuggestMode;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.Version;

/* loaded from: input_file:dfki/km/tweekreco/ner/NamedEntityRecognizer.class */
public class NamedEntityRecognizer {
    protected boolean m_bInitialized = false;
    protected boolean m_bNewestIndexLoaded = false;
    protected HashMap<String, Analyzer> m_hsField2Analyzer;
    protected PerFieldAnalyzerWrapper m_indexAnalyzer;
    protected IndexReader m_indexReader;
    protected IndexSearcher m_indexSearcher;
    protected IndexWriter m_indexWriter;
    protected MoreLikeThis m_moreLikeThis;
    protected POSTaggerME m_posTagger;
    protected QueryParser m_queryParser;
    protected HashSet<String> m_sEntityTypes4HighFrqTerms;
    protected HashSet<String> m_sHighFrqTerms4Disambiguation;
    protected DirectSpellChecker m_spellChecker;
    protected HashSet<String> m_sPosTags4Entities;
    protected String m_strNerDBPath;
    protected String m_strPosModelPath;

    /* loaded from: input_file:dfki/km/tweekreco/ner/NamedEntityRecognizer$AnalyzingStringSynonymMapBuilder.class */
    public static class AnalyzingStringSynonymMapBuilder extends SynonymMap.Parser {
        public AnalyzingStringSynonymMapBuilder(boolean z, Analyzer analyzer) {
            super(z, analyzer);
        }

        public void add(String str, String str2, boolean z) throws IOException {
            String lowerCase = str.toLowerCase();
            String lowerCase2 = str2.toLowerCase();
            try {
                super.add(analyze(lowerCase, new CharsRef()), analyze(lowerCase2, new CharsRef()), z);
            } catch (Exception e) {
                Logger.getLogger(AnalyzingStringSynonymMapBuilder.class.getName()).warning("Error during adding the synonyms '" + lowerCase + "' <> '" + lowerCase2 + "'");
            }
        }

        public void parse(Reader reader) throws IOException, ParseException {
            throw new UnsupportedOperationException();
        }
    }

    /* loaded from: input_file:dfki/km/tweekreco/ner/NamedEntityRecognizer$CollectionStringsInputIterator.class */
    public static class CollectionStringsInputIterator implements InputIterator {
        private Iterator<String> m_termsIterator;

        public CollectionStringsInputIterator(Collection<String> collection) {
            this.m_termsIterator = collection.iterator();
        }

        public CollectionStringsInputIterator(String... strArr) {
            this.m_termsIterator = Arrays.asList(strArr).iterator();
        }

        public Comparator<BytesRef> getComparator() {
            return null;
        }

        public boolean hasPayloads() {
            return false;
        }

        public BytesRef next() throws IOException {
            if (this.m_termsIterator.hasNext()) {
                return new BytesRef(this.m_termsIterator.next());
            }
            return null;
        }

        public BytesRef payload() {
            return null;
        }

        public long weight() {
            return 0L;
        }
    }

    /* loaded from: input_file:dfki/km/tweekreco/ner/NamedEntityRecognizer$IndexAtts.class */
    public static class IndexAtts {
        public static final String dbPediaId = "dbPediaId";
        public static final String entityDescriptionTerms = "entityDescriptionTerms";
        public static final String freebaseId = "freebaseId";
        public static final String id = "id";
        public static final String labelAnalyzed = "labelAnalyzed";
        public static final String labelAnalyzedRemovedStopwords = "labelAnalyzedRemovedStopwords";
        public static final String labelAsKeyword = "labelAsKeyword";
        public static final String type = "type";
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:dfki/km/tweekreco/ner/NamedEntityRecognizer$NerEntity4TextTriggerEquality.class */
    public class NerEntity4TextTriggerEquality extends NerEntity {
        private static final long serialVersionUID = 553831324022715627L;

        public NerEntity4TextTriggerEquality(NerEntity nerEntity) {
            super(nerEntity);
        }

        @Override // dfki.km.tweekreco.ner.Entity
        public boolean equals(Object obj) {
            return this.textTrigger.equals(((NerEntity) obj).textTrigger);
        }

        @Override // dfki.km.tweekreco.ner.Entity
        public int hashCode() {
            return this.textTrigger.hashCode();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:dfki/km/tweekreco/ner/NamedEntityRecognizer$NerEntityWithPosCheck.class */
    public class NerEntityWithPosCheck extends NerEntity {
        private static final long serialVersionUID = 8106919127527433207L;
        protected boolean m_bPosFineFlag;
        protected NerEntity m_templateEntity;

        public NerEntityWithPosCheck(NerEntity nerEntity, boolean z) {
            super(nerEntity);
            this.m_templateEntity = nerEntity;
            this.m_bPosFineFlag = z;
        }

        public NerEntity getTemplate() {
            return this.m_templateEntity;
        }

        public boolean posFine() {
            return this.m_bPosFineFlag;
        }
    }

    @Documentation(hide = true)
    public static void main(String[] strArr) throws Exception {
        NamedEntityRecognizer loadFromIndex = new NamedEntityRecognizer("de").loadFromIndex();
        System.out.println("searching for entities inside:\nGestern einen doch netten Film mit der Frau geschaut. Irgendwas mit 'fabelhafte Welt' und 'Amelie'");
        System.out.println("\n\nEntity labels: " + loadFromIndex.recognizeNamedEntityLabels("Gestern einen doch netten Film mit der Frau geschaut. Irgendwas mit 'fabelhafte Welt' und 'Amelie'", false, true));
        Set<NerEntity> recognizeNamedEntities = loadFromIndex.recognizeNamedEntities("Gestern einen doch netten Film mit der Frau geschaut. Irgendwas mit 'fabelhafte Welt' und 'Amelie'", false, true, null, 13);
        System.out.println("\nEntities: ");
        for (NerEntity nerEntity : recognizeNamedEntities) {
            System.out.println("   " + nerEntity.score + ": " + nerEntity);
        }
        System.out.println("\nEntities with description terms");
        for (NerEntity nerEntity2 : recognizeNamedEntities) {
            System.out.println("   " + nerEntity2 + ": " + loadFromIndex.getEntityDescriptionTerms(nerEntity2.id));
        }
        System.out.println(loadFromIndex.getEntityDescriptionTerms("http://rdf.freebase.com/ns/m.059wmcm"));
    }

    public NamedEntityRecognizer(String str) {
        if (new File(str).isAbsolute()) {
            this.m_strNerDBPath = str + "/";
        } else {
            this.m_strNerDBPath = GlobalConstants.strAppBasePath + "/" + str + "/";
        }
        init();
    }

    @Documentation(hide = true)
    public void addEntity2Index(NerEntity nerEntity) throws IOException {
        MultiValueHashMap<String, String> multiValueHashMap = new MultiValueHashMap<>();
        multiValueHashMap.add(IndexAtts.id, nerEntity.id);
        Iterator<String> it = nerEntity.types.iterator();
        while (it.hasNext()) {
            multiValueHashMap.add(IndexAtts.type, it.next());
        }
        addEntity2Index(nerEntity.label, nerEntity.synonyms, nerEntity.textTrigger, multiValueHashMap);
    }

    @Documentation(hide = true)
    public void addEntity2Index(String str, Collection<String> collection, String str2, MultiValueHashMap<String, String> multiValueHashMap) throws IOException {
        if (this.m_indexWriter == null) {
            throw new IllegalStateException("IndexWriter not found. Did you invoke startWriting?");
        }
        Document document = new Document();
        document.add(new StringField(IndexAtts.labelAsKeyword, str, Field.Store.YES));
        document.add(new LuceneUtilz.TextWithTermVectorOffsetsField(IndexAtts.labelAnalyzed, str, Field.Store.NO));
        document.add(new LuceneUtilz.TextWithTermVectorOffsetsField(IndexAtts.labelAnalyzedRemovedStopwords, str, Field.Store.NO));
        for (String str3 : collection) {
            document.add(new StringField(IndexAtts.labelAsKeyword, str3, Field.Store.YES));
            document.add(new LuceneUtilz.TextWithTermVectorOffsetsField(IndexAtts.labelAnalyzed, str3, Field.Store.NO));
            document.add(new LuceneUtilz.TextWithTermVectorOffsetsField(IndexAtts.labelAnalyzedRemovedStopwords, str3, Field.Store.NO));
        }
        if (!StringUtils.nullOrWhitespace(str2)) {
            document.add(new LuceneUtilz.TextWithTermVectorOffsetsField(IndexAtts.entityDescriptionTerms, str2, Field.Store.YES));
        }
        for (Map.Entry entry : multiValueHashMap.entryList()) {
            boolean z = false;
            if (this.m_hsField2Analyzer.get(entry.getKey()) == null) {
                z = true;
            } else if (this.m_hsField2Analyzer.get(entry.getKey()) instanceof KeywordAnalyzer) {
                z = true;
            }
            if (z) {
                document.add(new StringField((String) entry.getKey(), (String) entry.getValue(), Field.Store.YES));
            } else {
                document.add(new LuceneUtilz.TextWithTermVectorOffsetsField((String) entry.getKey(), (String) entry.getValue(), Field.Store.YES));
            }
        }
        try {
            this.m_indexWriter.addDocument(document);
            this.m_bNewestIndexLoaded = false;
        } catch (Exception e) {
            Logger.getLogger(NamedEntityRecognizer.class.getName()).warning("Error while adding a document to the index. " + document);
        }
    }

    private void addEntityLabelResultIfNotOverlapping(EntityLabel entityLabel, LinkedList<EntityLabel> linkedList) {
        Iterator<EntityLabel> it = linkedList.iterator();
        while (it.hasNext()) {
            EntityLabel next = it.next();
            if ((entityLabel.startOffset >= next.startOffset && entityLabel.startOffset <= next.endOffset) || (entityLabel.endOffset >= next.startOffset && entityLabel.endOffset <= next.endOffset)) {
                int i = next.endOffset - next.startOffset;
                int i2 = entityLabel.endOffset - entityLabel.startOffset;
                if (i > i2) {
                    return;
                }
                if (i < i2) {
                    it.remove();
                }
            }
        }
        int i3 = -1;
        int i4 = 0;
        while (true) {
            if (i4 >= linkedList.size()) {
                break;
            }
            if (linkedList.get(i4).startOffset > entityLabel.startOffset) {
                i3 = i4 - 1;
                break;
            }
            i4++;
        }
        if (i3 != -1) {
            linkedList.add(i3, entityLabel);
        } else {
            linkedList.add(entityLabel);
        }
    }

    @Documentation(hide = true)
    public void close() throws IOException {
        if (this.m_indexReader != null) {
            this.m_indexReader.close();
            this.m_indexReader = null;
        }
        stopWriting();
    }

    @Documentation("   |Example: disambiguate?entityLabel=Am%C3%A9lie%20Poulain&text2Check=franz%C3%B6sischer%20Spielfilm%20von%20Jean-Pierre%20Jeunet&entityTypes=[]&maxResults=2\n   |          =>   [ ... ,{\"score\":1.2789862,\"id\":\"http://rdf.freebase.com/ns/m.059wmcm\",\"label\":\"Amélie Poulain\",\n   |               \"types\":[\"http://rdf.freebase.com/ns/film.film_character\"],\"synonyms\":[\"Amelie\"]}]\n   | entityTypes is with json string array syntax, don't forget to URLEncode. An empty array means all entity types will be returned.\n   |")
    public Set<NerEntity> disambiguate(String str, String str2, Set<String> set, int i) throws IOException, org.apache.lucene.queryparser.classic.ParseException {
        if (!this.m_bNewestIndexLoaded) {
            loadFromIndex();
        }
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        this.m_moreLikeThis.setFieldNames((String[]) CollectionUtilz.createArray(new String[]{IndexAtts.entityDescriptionTerms}));
        this.m_moreLikeThis.setMinDocFreq(2);
        this.m_moreLikeThis.setMinTermFreq(1);
        this.m_moreLikeThis.setMinWordLen(2);
        this.m_moreLikeThis.setMaxQueryTerms(25);
        HashSet hashSet = new HashSet();
        hashSet.addAll(Arrays.asList(this.m_moreLikeThis.retrieveInterestingTerms(new StringReader(str2), IndexAtts.entityDescriptionTerms)));
        if (hashSet.size() < 25) {
            this.m_moreLikeThis.setMinDocFreq(1);
            String[] retrieveInterestingTerms = this.m_moreLikeThis.retrieveInterestingTerms(new StringReader(str2), IndexAtts.entityDescriptionTerms);
            for (int i2 = 0; hashSet.size() < 25 && i2 < retrieveInterestingTerms.length; i2++) {
                hashSet.add(retrieveInterestingTerms[i2]);
            }
        }
        StringBuilder sb = new StringBuilder();
        Iterator it = hashSet.iterator();
        while (it.hasNext()) {
            sb.append(" ").append((String) it.next());
        }
        String escape = QueryParser.escape(sb.toString());
        StringBuilder sb2 = new StringBuilder();
        if (set != null && !set.isEmpty()) {
            sb2.append(" +(");
            Iterator<String> it2 = set.iterator();
            while (it2.hasNext()) {
                sb2.append(' ').append(IndexAtts.type).append(":\"").append(QueryParser.escape(it2.next())).append('\"');
            }
            sb2.append(")");
        }
        TopDocs search = this.m_indexSearcher.search(this.m_queryParser.parse("+labelAsKeyword:\"" + QueryParser.escape(str) + "\"  " + escape + ((Object) sb2)), i);
        if (search.totalHits < 1) {
            return linkedHashSet;
        }
        for (int i3 = 0; i3 < Math.min(search.totalHits, i); i3++) {
            Document doc = this.m_indexSearcher.doc(search.scoreDocs[i3].doc, CollectionUtilz.createHashSet(new String[]{IndexAtts.id, IndexAtts.labelAsKeyword, IndexAtts.type}));
            NerEntity nerEntity = new NerEntity();
            nerEntity.id = doc.get(IndexAtts.id);
            nerEntity.score = search.scoreDocs[i3].score;
            nerEntity.types = Arrays.asList(doc.getValues(IndexAtts.type));
            int i4 = 0;
            for (String str3 : doc.getValues(IndexAtts.labelAsKeyword)) {
                int i5 = i4;
                i4++;
                if (i5 == 0) {
                    nerEntity.label = str3;
                } else {
                    nerEntity.synonyms.add(str3);
                }
            }
            linkedHashSet.add(nerEntity);
        }
        return linkedHashSet;
    }

    @Documentation("   |Example: getEntityDescriptionTerms?entityID=http://rdf.freebase.com/ns/m.0g01s\n   |          =>   Hobbits oder Halblinge sind fiktive, 60 bis 120 cm große menschenähnliche Wesen in der von J. R. R. Tolkien....\n   |")
    public String getEntityDescriptionTerms(String str) throws IOException {
        String str2;
        if (!this.m_bNewestIndexLoaded) {
            loadFromIndex();
        }
        Document uniqueDocWithTerm = LuceneUtilz.getUniqueDocWithTerm(new Term(IndexAtts.id, str), this.m_indexSearcher, Collections.singleton(IndexAtts.entityDescriptionTerms));
        return (uniqueDocWithTerm == null || (str2 = uniqueDocWithTerm.get(IndexAtts.entityDescriptionTerms)) == null) ? "" : str2;
    }

    @Documentation("   |Example: getEntityLabel?entityID=http://rdf.freebase.com/ns/m.0g01s\n   |          =>   Hobbit\n   |")
    public String getEntityLabel(String str) throws IOException {
        if (!this.m_bNewestIndexLoaded) {
            loadFromIndex();
        }
        Document uniqueDocWithTerm = LuceneUtilz.getUniqueDocWithTerm(new Term(IndexAtts.id, str), this.m_indexSearcher, Collections.singleton(IndexAtts.labelAsKeyword));
        return uniqueDocWithTerm == null ? "" : uniqueDocWithTerm.get(IndexAtts.labelAsKeyword);
    }

    @PostConstruct
    public void init() {
        if (this.m_bInitialized) {
            return;
        }
        this.m_bInitialized = true;
        try {
            CharArraySet charArraySet = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(FileUtils.file2String(this.m_strNerDBPath + "stopwords4Indexing.txt").split("\\n")), true);
            this.m_sHighFrqTerms4Disambiguation = new HashSet<>(Arrays.asList(FileUtils.file2String(this.m_strNerDBPath + "highFrqTerms4Disambiguation.txt").split("\\n")));
            this.m_strPosModelPath = this.m_strNerDBPath + "openNLPPosModel.bin";
            this.m_sPosTags4Entities = new HashSet<>(Arrays.asList(FileUtils.file2String(this.m_strNerDBPath + "posTags4Entities.txt").split("\\n")));
            this.m_sEntityTypes4HighFrqTerms = new HashSet<>(Arrays.asList(FileUtils.file2String(this.m_strNerDBPath + "entityTypes4HighFrqTerms.txt").split("\\n")));
            this.m_hsField2Analyzer = new HashMap<>();
            this.m_hsField2Analyzer.put(IndexAtts.entityDescriptionTerms, new StandardAnalyzer(Version.LUCENE_CURRENT));
            this.m_hsField2Analyzer.put(IndexAtts.labelAsKeyword, new KeywordAnalyzer());
            this.m_hsField2Analyzer.put(IndexAtts.id, new KeywordAnalyzer());
            this.m_hsField2Analyzer.put(IndexAtts.dbPediaId, new KeywordAnalyzer());
            this.m_hsField2Analyzer.put(IndexAtts.freebaseId, new KeywordAnalyzer());
            this.m_hsField2Analyzer.put(IndexAtts.labelAnalyzed, new NamedEntityAnalyzer(Version.LUCENE_CURRENT, null));
            this.m_hsField2Analyzer.put(IndexAtts.labelAnalyzedRemovedStopwords, new NamedEntityAnalyzer(Version.LUCENE_CURRENT, charArraySet));
            this.m_indexAnalyzer = new PerFieldAnalyzerWrapper(new KeywordAnalyzer(), this.m_hsField2Analyzer);
            this.m_queryParser = new QueryParser(Version.LUCENE_CURRENT, IndexAtts.entityDescriptionTerms, this.m_indexAnalyzer);
            FileInputStream fileInputStream = new FileInputStream(this.m_strPosModelPath);
            this.m_posTagger = new POSTaggerME(new POSModel(fileInputStream));
            fileInputStream.close();
            Logger.getLogger(NamedEntityRecognizer.class.getName()).info("Initialized named entity recognition service");
        } catch (Exception e) {
            Logger.getLogger(NamedEntityRecognizer.class.getName()).log(Level.SEVERE, "Error", (Throwable) e);
        }
    }

    @Documentation(hide = true)
    public NamedEntityRecognizer loadFromIndex() throws IOException {
        if (this.m_indexReader != null) {
            this.m_indexReader.close();
        }
        this.m_indexReader = DirectoryReader.open(new SimpleFSDirectory(new File(this.m_strNerDBPath + "index")));
        this.m_indexSearcher = new IndexSearcher(this.m_indexReader);
        this.m_moreLikeThis = new MoreLikeThis(this.m_indexReader);
        this.m_moreLikeThis.setAnalyzer(new StandardTokenizingAnalyzer(true, Version.LUCENE_CURRENT));
        this.m_spellChecker = new DirectSpellChecker();
        this.m_spellChecker.setMaxEdits(1);
        this.m_bNewestIndexLoaded = true;
        return this;
    }

    @Documentation("   | Example: recognizeNamedEntities?text2Check=hobbit&fuzzy=false&stopwordRemoval=false&entityTypes=\n   |               [\"http%3a%2f%2frdf%2efreebase%2ecom%2fns%2fbook%2ebook%5fsubject\"]&maxResults=5\n   |          =>   [{\"score\":10.770279,\"textTrigger\":\"hobbit\",\"id\":\"http://rdf.freebase.com/ns/m.0g01s\",\"label\":\"Hobbit\",\n   |               \"types\":[\"http://rdf.freebase.com/ns/book.book_subject\",\"http://rdf.freebase.com/ns/film.film_subject\",\n   |               \"http://rdf.freebase.com/ns/film.film_character\"],\"synonyms\":[\"Halbling\",\"Halblinge\"]}]\n   | entityTypes is with json string array syntax, don't forget to URLEncode. An empty array means all entity types will be returned.\n   |")
    public Set<NerEntity> recognizeNamedEntities(String str, boolean z, boolean z2, Set<String> set, int i) throws Exception {
        if (!this.m_bNewestIndexLoaded) {
            loadFromIndex();
        }
        Set<EntityLabel> recognizeNamedEntityLabels = recognizeNamedEntityLabels(str, z, z2);
        PriorityQueue priorityQueue = new PriorityQueue();
        StandardTokenizingAnalyzer standardTokenizingAnalyzer = new StandardTokenizingAnalyzer(Version.LUCENE_CURRENT);
        int i2 = 0;
        for (EntityLabel entityLabel : recognizeNamedEntityLabels) {
            StringBuilder sb = new StringBuilder();
            StringBuilder sb2 = new StringBuilder();
            List<String> analyzeText = LuceneUtilz.analyzeText("", entityLabel.textTrigger, standardTokenizingAnalyzer, 1000);
            String[] split = entityLabel.textTriggerTermPOS.split("\\s");
            int i3 = 0;
            for (String str2 : analyzeText) {
                List<String> analyzeText2 = LuceneUtilz.analyzeText("", str2, this.m_hsField2Analyzer.get(IndexAtts.labelAnalyzedRemovedStopwords), 1);
                if (analyzeText2 != null && !analyzeText2.isEmpty()) {
                    sb.append(str2).append(" ");
                    sb2.append(split[i3]).append(" ");
                }
                i3++;
            }
            String lowerCase = sb.toString().trim().toLowerCase();
            String trim = sb2.toString().trim();
            boolean z3 = false;
            if (trim.contains(" ")) {
                String[] split2 = entityLabel.textTriggerTermPOS.split(" ");
                int length = split2.length;
                int i4 = 0;
                while (true) {
                    if (i4 >= length) {
                        break;
                    }
                    if (this.m_sPosTags4Entities.contains(split2[i4])) {
                        z3 = true;
                        i2++;
                        break;
                    }
                    i4++;
                }
            } else {
                r18 = StringUtils.nullOrWhitespace(lowerCase) || this.m_sHighFrqTerms4Disambiguation.contains(lowerCase);
                if (this.m_sPosTags4Entities.contains(trim) || this.m_sPosTags4Entities.contains(entityLabel.textTriggerTermPOS)) {
                    z3 = true;
                    i2++;
                }
            }
            Set<NerEntity> disambiguate = disambiguate(entityLabel.label, str, set, 1);
            if (r18) {
                Iterator<NerEntity> it = disambiguate.iterator();
                while (it.hasNext()) {
                    boolean z4 = true;
                    Iterator<String> it2 = it.next().types.iterator();
                    while (true) {
                        if (!it2.hasNext()) {
                            break;
                        }
                        if (this.m_sEntityTypes4HighFrqTerms.contains(it2.next())) {
                            z4 = false;
                            break;
                        }
                    }
                    if (z4) {
                        it.remove();
                    }
                }
            }
            if (!disambiguate.isEmpty()) {
                NerEntity next = disambiguate.iterator().next();
                next.textTrigger = entityLabel.textTrigger;
                next.textTriggerTermPOS = entityLabel.textTriggerTermPOS;
                priorityQueue.add(new NerEntityWithPosCheck(next, z3));
            }
        }
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        HashSet hashSet = new HashSet();
        boolean z5 = i2 / priorityQueue.size() < 0.3d;
        while (!priorityQueue.isEmpty()) {
            NerEntityWithPosCheck nerEntityWithPosCheck = (NerEntityWithPosCheck) priorityQueue.poll();
            if (z5 || nerEntityWithPosCheck.posFine()) {
                if (hashSet.add(new NerEntity4TextTriggerEquality(nerEntityWithPosCheck))) {
                    linkedHashSet.add(nerEntityWithPosCheck.getTemplate());
                }
                if (linkedHashSet.size() == i) {
                    break;
                }
            }
        }
        return linkedHashSet;
    }

    public NerEntity getEntity(String str) throws IOException {
        if (!this.m_bNewestIndexLoaded) {
            loadFromIndex();
        }
        Document uniqueDocWithTerm = LuceneUtilz.getUniqueDocWithTerm(new Term(IndexAtts.id, str), this.m_indexSearcher, CollectionUtilz.createHashSet(new String[]{IndexAtts.id, IndexAtts.labelAsKeyword, IndexAtts.type}));
        if (uniqueDocWithTerm == null) {
            return null;
        }
        NerEntity nerEntity = new NerEntity();
        nerEntity.id = uniqueDocWithTerm.get(IndexAtts.id);
        nerEntity.types = Arrays.asList(uniqueDocWithTerm.getValues(IndexAtts.type));
        int i = 0;
        for (String str2 : uniqueDocWithTerm.getValues(IndexAtts.labelAsKeyword)) {
            int i2 = i;
            i++;
            if (i2 == 0) {
                nerEntity.label = str2;
            } else {
                nerEntity.synonyms.add(str2);
            }
        }
        return nerEntity;
    }

    @Documentation("   | Example: recognizeNamedEntityLabels?text2Check=hobbit&fuzzy=false&stopwordRemoval=false\n   |          =>   [{\"label\":\"Hobbit\",\"textTrigger\":\"hobbit\",\"startOffset\":0,\"endOffset\":0}]\n   |")
    public Set<EntityLabel> recognizeNamedEntityLabels(String str, boolean z, boolean z2) throws Exception {
        if (!this.m_bNewestIndexLoaded) {
            loadFromIndex();
        }
        LinkedList<EntityLabel> linkedList = new LinkedList<>();
        HashSet hashSet = new HashSet();
        StandardTokenizingAnalyzer standardTokenizingAnalyzer = new StandardTokenizingAnalyzer(Version.LUCENE_CURRENT);
        List<String> analyzeText = LuceneUtilz.analyzeText(IndexAtts.entityDescriptionTerms, str, standardTokenizingAnalyzer, 10000);
        String[] tag = this.m_posTagger.tag((String[]) analyzeText.toArray(new String[0]));
        LinkedList linkedList2 = new LinkedList();
        int i = 0;
        int i2 = 0;
        Iterator<String> it = analyzeText.iterator();
        while (it.hasNext()) {
            linkedList2.add(it.next());
            if (linkedList2.size() > 13) {
                linkedList2.removeFirst();
                i++;
                i2 = 0;
            }
            StringBuilder sb = new StringBuilder();
            int i3 = 0;
            Iterator it2 = linkedList2.iterator();
            while (it2.hasNext()) {
                sb.append((String) it2.next());
                if (i3 >= i2) {
                    String sb2 = sb.toString();
                    Iterator it3 = ((Set) singleTermLookup(sb2, z, z2).getSecond()).iterator();
                    while (it3.hasNext()) {
                        EntityLabel entityLabel = new EntityLabel((String) it3.next(), sb2);
                        entityLabel.startOffset = i;
                        entityLabel.endOffset = i + i3;
                        StringBuilder sb3 = new StringBuilder();
                        for (int i4 = entityLabel.startOffset; i4 <= entityLabel.endOffset; i4++) {
                            sb3.append(tag[i4]).append(' ');
                        }
                        entityLabel.textTriggerTermPOS = sb3.toString().trim();
                        addEntityLabelResultIfNotOverlapping(entityLabel, linkedList);
                    }
                }
                sb.append(' ');
                i3++;
            }
            i2++;
            Iterator<EntityLabel> it4 = linkedList.iterator();
            while (it4.hasNext()) {
                EntityLabel next = it4.next();
                if (next.startOffset < i - 13) {
                    hashSet.add(next);
                    it4.remove();
                }
            }
        }
        standardTokenizingAnalyzer.close();
        while (!linkedList2.isEmpty()) {
            linkedList2.removeFirst();
            i++;
            StringBuilder sb4 = new StringBuilder();
            int i5 = 0;
            Iterator it5 = linkedList2.iterator();
            while (it5.hasNext()) {
                sb4.append((String) it5.next());
                String sb5 = sb4.toString();
                Iterator it6 = ((Set) singleTermLookup(sb5, z, z2).getSecond()).iterator();
                while (it6.hasNext()) {
                    EntityLabel entityLabel2 = new EntityLabel((String) it6.next(), sb5);
                    entityLabel2.startOffset = i;
                    entityLabel2.endOffset = i + i5;
                    StringBuilder sb6 = new StringBuilder();
                    for (int i6 = entityLabel2.startOffset; i6 <= entityLabel2.endOffset; i6++) {
                        sb6.append(tag[i6]).append(' ');
                    }
                    entityLabel2.textTriggerTermPOS = sb6.toString().trim();
                    addEntityLabelResultIfNotOverlapping(entityLabel2, linkedList);
                }
                sb4.append(' ');
                i5++;
            }
        }
        Iterator<EntityLabel> it7 = linkedList.iterator();
        while (it7.hasNext()) {
            hashSet.add(it7.next());
        }
        StandardTokenizingAnalyzer standardTokenizingAnalyzer2 = new StandardTokenizingAnalyzer(Version.LUCENE_CURRENT);
        Iterator it8 = hashSet.iterator();
        while (it8.hasNext()) {
            EntityLabel entityLabel3 = (EntityLabel) it8.next();
            if (z2) {
                int i7 = -1;
                int i8 = 0;
                List<String> analyzeText2 = LuceneUtilz.analyzeText("", entityLabel3.textTrigger, standardTokenizingAnalyzer2, 1000);
                String[] split = entityLabel3.textTriggerTermPOS.split("\\s");
                int i9 = 0;
                Iterator<String> it9 = analyzeText2.iterator();
                while (it9.hasNext()) {
                    List<String> analyzeText3 = LuceneUtilz.analyzeText("", it9.next(), this.m_hsField2Analyzer.get(IndexAtts.labelAnalyzedRemovedStopwords), 1);
                    if (analyzeText3 != null && !analyzeText3.isEmpty()) {
                        if (i7 == -1) {
                            i7 = i9;
                        }
                        i8 = i9;
                    }
                    i9++;
                }
                if (i7 == -1) {
                    it8.remove();
                } else if (i7 > 0 || i8 + 1 < analyzeText2.size()) {
                    StringBuilder sb7 = new StringBuilder();
                    StringBuilder sb8 = new StringBuilder();
                    for (int i10 = i7; i10 <= i8; i10++) {
                        sb7.append(analyzeText2.get(i10)).append(' ');
                        sb8.append(split[i10]).append(' ');
                    }
                    entityLabel3.textTrigger = sb7.toString().trim();
                    entityLabel3.textTriggerTermPOS = sb8.toString().trim();
                    entityLabel3.startOffset += i7;
                    entityLabel3.endOffset -= (analyzeText2.size() - i8) - 1;
                }
            }
        }
        return hashSet;
    }

    protected TwoValuesBox<Boolean, Set<String>> singleTermLookup(String str, boolean z, boolean z2) throws Exception {
        String str2 = IndexAtts.labelAnalyzed;
        if (z2) {
            str2 = IndexAtts.labelAnalyzedRemovedStopwords;
        }
        List<String> analyzeText = LuceneUtilz.analyzeText(str2, str, this.m_indexAnalyzer, 1);
        if (analyzeText.size() == 0) {
            return new TwoValuesBox<>(false, Collections.singleton(str));
        }
        Term term = new Term(str2, analyzeText.get(0));
        LinkedList linkedList = new LinkedList();
        HashSet hashSet = new HashSet();
        boolean z3 = false;
        if (this.m_indexReader.docFreq(term) > 0) {
            linkedList.add(term.text());
            z3 = true;
        } else if (z) {
            SuggestWord[] suggestSimilar = this.m_spellChecker.suggestSimilar(term, 13, this.m_indexReader, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
            if (suggestSimilar == null) {
                suggestSimilar = new SuggestWord[0];
            }
            for (SuggestWord suggestWord : suggestSimilar) {
                linkedList.add(suggestWord.string);
            }
        }
        Iterator it = linkedList.iterator();
        while (it.hasNext()) {
            String str3 = (String) it.next();
            for (Document document : LuceneUtilz.getDocsWithTerm(new Term(str2, str3), 13, this.m_indexSearcher, Collections.singleton(IndexAtts.labelAsKeyword))) {
                boolean z4 = false;
                String[] values = document.getValues(IndexAtts.labelAsKeyword);
                int length = values.length;
                int i = 0;
                while (true) {
                    if (i >= length) {
                        break;
                    }
                    String str4 = values[i];
                    List<String> analyzeText2 = LuceneUtilz.analyzeText(str2, str4, this.m_indexAnalyzer, 1);
                    if (analyzeText2.size() != 0 && str3.equals(analyzeText2.get(0))) {
                        hashSet.add(str4);
                        z4 = true;
                        break;
                    }
                    i++;
                }
                if (!z4) {
                    hashSet.add(document.get(IndexAtts.labelAsKeyword));
                }
            }
        }
        return new TwoValuesBox<>(Boolean.valueOf(z3), hashSet);
    }

    @Documentation(hide = true)
    public void startWriting(boolean z) throws IOException {
        if (this.m_indexWriter != null) {
            throw new IllegalStateException("An open IndexWriter exists yet.");
        }
        if (!z && DirectoryReader.indexExists(new SimpleFSDirectory(new File(this.m_strNerDBPath + "index")))) {
            throw new IllegalStateException("Found existing index under " + this.m_strNerDBPath + "index");
        }
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_CURRENT, this.m_indexAnalyzer);
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        this.m_indexWriter = new IndexWriter(new SimpleFSDirectory(new File(this.m_strNerDBPath + "index")), indexWriterConfig);
    }

    @Documentation(hide = true)
    public void stopWriting() throws IOException {
        if (this.m_indexWriter != null) {
            this.m_indexWriter.commit();
            this.m_indexWriter.close(true);
            this.m_indexWriter = null;
            this.m_bNewestIndexLoaded = false;
        }
    }

    static {
        FileUtils.setBaseDirectory(FileUtils.getAppDirectory());
    }
}
