package de.dfki.km.exact.lucene;

import de.dfki.km.exact.lucene.voc.DEFAULT;
import de.dfki.km.exact.lucene.voc.FIELD;
import de.dfki.km.exact.misc.EULogger;
import de.dfki.km.exact.misc.EUString;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

/* loaded from: input_file:WEB-INF/lib/lucene-util-17-20121203.113547-1.jar:de/dfki/km/exact/lucene/LUSearcher.class */
public class LUSearcher implements DEFAULT {
    private int mHitNumber;
    protected final IndexSearcher mIndexSearcher;

    public LUSearcher(String str) throws Exception {
        this(FSDirectory.open(new File(str)));
    }

    public LUSearcher(Directory directory) throws Exception {
        this.mHitNumber = 50;
        setMaxClauseCount(Integer.MAX_VALUE);
        this.mIndexSearcher = new IndexSearcher(directory);
    }

    public final String getValue(int i, String str) throws Exception {
        return this.mIndexSearcher.doc(i).get(str);
    }

    public final ScoreDoc[] search(Query query) throws Exception {
        TopScoreDocCollector create = TopScoreDocCollector.create(this.mHitNumber, true);
        this.mIndexSearcher.search(query, create);
        return create.topDocs().scoreDocs;
    }

    public final ScoreDoc[] search(int i, Query query) throws Exception {
        TopScoreDocCollector create = TopScoreDocCollector.create(i, true);
        this.mIndexSearcher.search(query, create);
        return create.topDocs().scoreDocs;
    }

    public final boolean hasResult(Query query) throws Exception {
        return search(1, query).length > 0;
    }

    public final int getFirstDoc(String str, String str2) throws Exception {
        ScoreDoc[] search = search(new TermQuery(new Term(str, str2)));
        if (search.length > 0) {
            return search[0].doc;
        }
        return -1;
    }

    public final Iterator<LUWindow> getWindowIterator(String str, String str2, int i) {
        return new LUWindowIterator(this.mIndexSearcher, str, str2, i);
    }

    public final List<LUWindow> getSingleWordWindows(String str, String str2, int i) {
        LinkedList linkedList = new LinkedList();
        IndexReader indexReader = this.mIndexSearcher.getIndexReader();
        try {
            Spans spans = new SpanTermQuery(new Term(str2, str)).getSpans(indexReader);
            LUWindowMapper lUWindowMapper = new LUWindowMapper();
            while (spans.next()) {
                lUWindowMapper.clear();
                int start = spans.start();
                int i2 = start > i ? start - i : 0;
                int end = spans.end() + i;
                lUWindowMapper.setStart(i2);
                lUWindowMapper.setEnd(end);
                indexReader.getTermFreqVector(spans.doc(), str2, lUWindowMapper);
                TreeSet<LUWindowEntry> entries = lUWindowMapper.getEntries();
                int size = i2 + entries.size();
                LUWindow lUWindow = new LUWindow(spans.doc(), entries.size(), i2);
                Iterator<LUWindowEntry> it = entries.iterator();
                while (it.hasNext()) {
                    LUWindowEntry next = it.next();
                    lUWindow.setTerm(next.getTerm(), next.getPosition());
                }
                linkedList.add(lUWindow);
            }
        } catch (Exception e) {
            EULogger.warn(LUSearcher.class, e.getMessage());
        }
        return linkedList;
    }

    public final List<LUWindow> getWindows(String str, String str2, int i) {
        String[] split = EUString.split(str);
        return split.length > 1 ? getMultiWordWindows(split, str2, i) : getSingleWordWindows(str, str2, i);
    }

    public final List<LUWindow> getMultiWordWindows(String[] strArr, String str, int i) {
        LinkedList linkedList = new LinkedList();
        IndexReader indexReader = this.mIndexSearcher.getIndexReader();
        SpanTermQuery spanTermQuery = new SpanTermQuery(new Term(str, strArr[0]));
        String[] tail = EUString.tail(strArr);
        try {
            Spans spans = spanTermQuery.getSpans(indexReader);
            LUWindowMapper lUWindowMapper = new LUWindowMapper();
            while (spans.next()) {
                lUWindowMapper.clear();
                int start = spans.start();
                int i2 = start > i ? start - i : 0;
                int end = spans.end() + i;
                lUWindowMapper.setStart(i2);
                lUWindowMapper.setEnd(end);
                indexReader.getTermFreqVector(spans.doc(), str, lUWindowMapper);
                TreeSet<LUWindowEntry> entries = lUWindowMapper.getEntries();
                int size = i2 + entries.size();
                LUWindow lUWindow = new LUWindow(spans.doc(), entries.size(), i2);
                Iterator<LUWindowEntry> it = entries.iterator();
                while (it.hasNext()) {
                    LUWindowEntry next = it.next();
                    lUWindow.setTerm(next.getTerm(), next.getPosition());
                }
                if (lUWindow.contains(spans.start() + 1, tail)) {
                    linkedList.add(lUWindow);
                }
            }
        } catch (Exception e) {
            EULogger.warn(LUSearcher.class, e.getMessage());
        }
        return linkedList;
    }

    public final void setHitNumber(int i) {
        this.mHitNumber = i;
    }

    public int getHitNumber() {
        return this.mHitNumber;
    }

    public final Set<String> getSingleWordTerms() {
        HashSet hashSet = new HashSet();
        try {
            TermEnum terms = this.mIndexSearcher.getIndexReader().terms();
            while (terms.next()) {
                hashSet.add(terms.term().text());
            }
        } catch (IOException e) {
            EULogger.warn(getClass(), e);
        }
        return hashSet;
    }

    public final Set<String> getSingleWordTerms(String str) {
        HashSet hashSet = new HashSet();
        try {
            TermEnum terms = this.mIndexSearcher.getIndexReader().terms();
            while (terms.next()) {
                Term term = terms.term();
                if (term.field().equals(str)) {
                    hashSet.add(term.text());
                }
            }
        } catch (IOException e) {
            EULogger.warn(getClass(), e);
        }
        return hashSet;
    }

    public final TermEnum getTermEnum(String str) {
        try {
            return getIndexSearcher().getIndexReader().terms(new Term(str));
        } catch (IOException e) {
            EULogger.warn(e);
            return null;
        }
    }

    public int countTerms(String str) {
        int i = 0;
        while (getTermEnum(FIELD.CONTENT).next()) {
            try {
                i++;
            } catch (IOException e) {
                EULogger.warn(e);
            }
        }
        return i;
    }

    public final Set<String> getMultiWordTerms(int i, String str) {
        HashSet hashSet = new HashSet();
        try {
            int maxDoc = getIndexSearcher().getIndexReader().maxDoc();
            for (int i2 = 0; i2 < maxDoc; i2++) {
                hashSet.addAll(getMultiWordTerms(i, i2, str));
            }
        } catch (Exception e) {
            EULogger.warn(getClass(), e);
        }
        return hashSet;
    }

    public final Set<String> getMultiWordTerms(int i, int i2, String str) throws Exception {
        HashSet hashSet = new HashSet();
        TermPositionVector termPositionVector = getTermPositionVector(i2, str);
        String[] recycle = termPositionVector != null ? LURecycler.recycle(termPositionVector) : EUString.split(getValue(i2, str));
        StringBuilder sb = new StringBuilder();
        for (int i3 = 0; i3 < recycle.length; i3++) {
            sb.append(recycle[i3]);
            for (int i4 = 1; i4 < i && i3 + i4 < recycle.length; i4++) {
                sb.append(" ");
                sb.append(recycle[i3 + i4]);
                hashSet.add(sb.toString());
            }
            sb.setLength(0);
        }
        return hashSet;
    }

    public final Set<String> getSingleWordTerms(int i, String str) throws Exception {
        HashSet hashSet = new HashSet();
        TermPositionVector termPositionVector = getTermPositionVector(i, str);
        for (String str2 : termPositionVector != null ? LURecycler.recycle(termPositionVector) : EUString.split(getValue(i, str))) {
            hashSet.add(str2);
        }
        return hashSet;
    }

    protected final TermPositionVector getTermPositionVector(int i, String str) throws Exception {
        TermFreqVector termFreqVector = this.mIndexSearcher.getIndexReader().getTermFreqVector(i, str);
        if (termFreqVector instanceof TermPositionVector) {
            return (TermPositionVector) termFreqVector;
        }
        return null;
    }

    public final int getMultiWordTermFreq(int i, String str, String[] strArr) throws Exception {
        int i2 = 0;
        boolean z = true;
        TermPositionVector termPositionVector = getTermPositionVector(i, str);
        for (int i3 : termPositionVector.getTermPositions(termPositionVector.indexOf(strArr[0]))) {
            int i4 = 1;
            while (true) {
                if (i4 >= strArr.length) {
                    break;
                }
                if (!contains(i3 + i4, termPositionVector.getTermPositions(termPositionVector.indexOf(strArr[i4])))) {
                    z = false;
                    break;
                }
                i4++;
            }
            if (z) {
                i2++;
            }
            z = true;
        }
        return i2;
    }

    public final SortedSet<LUTermInfo> getMaxSingleWordTerms(int i, String[] strArr) {
        TreeSet treeSet = new TreeSet();
        HashSet hashSet = new HashSet();
        for (String str : strArr) {
            hashSet.add(str);
        }
        try {
            TermEnum terms = this.mIndexSearcher.getIndexReader().terms();
            while (terms.next()) {
                Term term = terms.term();
                if (hashSet.contains(term.field())) {
                    treeSet.add(getSingleWordTermInfo(term.text(), strArr));
                    if (treeSet.size() > i) {
                        treeSet.remove(treeSet.last());
                    }
                }
            }
        } catch (Exception e) {
            EULogger.warn(getClass(), e);
        }
        return treeSet;
    }

    public final LUTermInfo getMaxSingleWordTerm(String[] strArr) {
        LUTermInfo lUTermInfo = new LUTermInfo(null);
        HashSet hashSet = new HashSet();
        for (String str : strArr) {
            hashSet.add(str);
        }
        try {
            TermEnum terms = this.mIndexSearcher.getIndexReader().terms();
            while (terms.next()) {
                Term term = terms.term();
                if (hashSet.contains(term.field())) {
                    LUTermInfo singleWordTermInfo = getSingleWordTermInfo(term.text(), strArr);
                    if (singleWordTermInfo.getFrequency() > lUTermInfo.getFrequency()) {
                        lUTermInfo = singleWordTermInfo;
                    }
                }
            }
        } catch (Exception e) {
            EULogger.warn(getClass(), e);
        }
        return lUTermInfo;
    }

    public final LUTermInfo getMultiWordTermInfo(String str, String str2) throws Exception {
        String[] split = EUString.split(str);
        ScoreDoc[] search = search(LUQueryFactory.getPhraseQuery(str2, split));
        if (search == null || search.length == 0) {
            return null;
        }
        LUTermInfo lUTermInfo = new LUTermInfo(str);
        lUTermInfo.increaseDocFrequency(search.length);
        for (ScoreDoc scoreDoc : search) {
            lUTermInfo.increaseFrequency(getMultiWordTermFreq(scoreDoc.doc, str2, split));
        }
        return lUTermInfo;
    }

    public final LUTermInfo getSingleWordTermInfo(String str, String[] strArr) {
        LUTermInfo lUTermInfo = new LUTermInfo(str);
        try {
            HashSet hashSet = new HashSet();
            IndexReader indexReader = this.mIndexSearcher.getIndexReader();
            for (String str2 : strArr) {
                TermDocs termDocs = indexReader.termDocs(new Term(str2, str));
                while (termDocs.next()) {
                    if (!hashSet.contains(Integer.valueOf(termDocs.doc()))) {
                        lUTermInfo.increaseDocFrequency(1);
                        hashSet.add(Integer.valueOf(termDocs.doc()));
                    }
                    lUTermInfo.increaseFrequency(termDocs.freq());
                }
                termDocs.close();
            }
            return lUTermInfo;
        } catch (IOException e) {
            EULogger.warn(getClass(), e);
            return lUTermInfo;
        }
    }

    public SortedSet<LUCooccurrence> getCoocurences(int i, String str, String str2) {
        List<LUWindow> windows = getWindows(str2, str, i);
        EULogger.info(windows.size() + " windows for coocurency search...");
        return getCoocurences(str2, windows);
    }

    public SortedSet<LUCooccurrence> getCoocurences(int i, int i2, String str, String str2) {
        List<LUWindow> windows = getWindows(str2, str, i2);
        EULogger.info(windows.size() + " windows for coocurency search...");
        return getCoocurences(i, str2, windows);
    }

    public SortedSet<LUCooccurrence> getCoocurences(int i, String str, List<LUWindow> list) {
        StringBuilder sb = new StringBuilder();
        TreeSet treeSet = new TreeSet();
        HashMap hashMap = new HashMap();
        for (LUWindow lUWindow : list) {
            String[] terms = lUWindow.getTerms();
            lUWindow.clear();
            for (int i2 = 0; i2 < terms.length; i2++) {
                for (int i3 = 0; i3 < i && i2 + i3 < terms.length; i3++) {
                    if (i3 != 0) {
                        sb.append(" ");
                    }
                    sb.append(terms[i2 + i3]);
                    String sb2 = sb.toString();
                    LUCooccurrence lUCooccurrence = (LUCooccurrence) hashMap.get(sb2);
                    if (lUCooccurrence == null) {
                        lUCooccurrence = new LUCooccurrence(sb2);
                        hashMap.put(sb2, lUCooccurrence);
                    }
                    lUCooccurrence.increase();
                }
                sb.setLength(0);
            }
        }
        hashMap.remove(str);
        treeSet.addAll(hashMap.values());
        return treeSet;
    }

    public SortedSet<LUCooccurrence> getCoocurences(int i, String str, Set<String> set, List<LUWindow> list) {
        StringBuilder sb = new StringBuilder();
        HashMap hashMap = new HashMap();
        for (LUWindow lUWindow : list) {
            String[] terms = lUWindow.getTerms();
            lUWindow.clear();
            for (int i2 = 0; i2 < terms.length; i2++) {
                for (int i3 = 0; i3 < i && i2 + i3 < terms.length; i3++) {
                    if (i3 != 0) {
                        sb.append(" ");
                    }
                    sb.append(terms[i2 + i3]);
                    String sb2 = sb.toString();
                    if (set.contains(sb2)) {
                        LUCooccurrence lUCooccurrence = (LUCooccurrence) hashMap.get(sb2);
                        if (lUCooccurrence == null) {
                            lUCooccurrence = new LUCooccurrence(sb2);
                            hashMap.put(sb2, lUCooccurrence);
                        }
                        lUCooccurrence.increase();
                    }
                }
                sb.setLength(0);
            }
        }
        hashMap.remove(str);
        return new TreeSet(hashMap.values());
    }

    public SortedSet<LUCooccurrence> getCoocurences(String str, List<LUWindow> list) {
        TreeSet treeSet = new TreeSet();
        HashMap hashMap = new HashMap();
        Iterator<LUWindow> it = list.iterator();
        while (it.hasNext()) {
            for (String str2 : it.next().getTerms()) {
                LUCooccurrence lUCooccurrence = (LUCooccurrence) hashMap.get(str2);
                if (lUCooccurrence == null) {
                    lUCooccurrence = new LUCooccurrence(str2);
                    hashMap.put(str2, lUCooccurrence);
                }
                lUCooccurrence.increase();
            }
        }
        hashMap.remove(str);
        treeSet.addAll(hashMap.values());
        return treeSet;
    }

    public IndexSearcher getIndexSearcher() {
        return this.mIndexSearcher;
    }

    public static final void setMaxClauseCount(int i) {
        BooleanQuery.setMaxClauseCount(i);
    }

    public static final FuzzyQuery getFuzzyQuery(String str, String str2, float f) {
        return new FuzzyQuery(new Term(str, str2), f);
    }

    private static final boolean contains(int i, int[] iArr) {
        for (int i2 : iArr) {
            if (i2 == i) {
                return true;
            }
        }
        return false;
    }

    public final void logAll() {
        int maxDoc = getIndexSearcher().maxDoc();
        for (int i = 0; i < maxDoc; i++) {
            try {
                EULogger.info(getIndexSearcher().doc(i).toString());
            } catch (Exception e) {
                EULogger.warn(getClass(), e);
            }
        }
    }
}
