package org.dynaq.search.pull.sections;

import de.dfki.inquisition.lucene.IndexAccessor;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.similar.MoreLikeThis;
import org.apache.lucene.util.PriorityQueue;
import org.dynaq.config.AttributeConfig;
import org.dynaq.core.DynaQDocument;
import org.dynaq.core.DynaQResultList;
import org.dynaq.core.ScoredDynaQDocument;
import org.dynaq.index.LuceneServicePlugin;
import org.dynaq.util.clustering.DoublingIncrementalClusterer;
import org.dynaq.util.clustering.FeatureVector;

/* loaded from: input_file:org/dynaq/search/pull/sections/ClusterTerms.class */
public class ClusterTerms {
    Map<String, FeatureVector> m_mapDocFv = new HashMap();

    public Object[][] getClusterTerms(DynaQResultList dynaQResultList, int i, int i2, int i3, LuceneServicePlugin luceneServicePlugin) throws Exception {
        return extractClusterTerms(generateClusteringObject(dynaQResultList, i, i3, luceneServicePlugin), i2, luceneServicePlugin);
    }

    private DoublingIncrementalClusterer generateClusteringObject(DynaQResultList dynaQResultList, int i, int i2, LuceneServicePlugin luceneServicePlugin) throws Exception {
        IndexReader multiIndexReader = IndexAccessor.getMultiIndexReader(luceneServicePlugin.getIndexPaths(), true);
        try {
            Iterator<ScoredDynaQDocument> it = dynaQResultList.getResultDocs().iterator();
            DoublingIncrementalClusterer doublingIncrementalClusterer = new DoublingIncrementalClusterer(i);
            HashMap hashMap = new HashMap();
            int i3 = 1;
            while (i3 <= i2) {
                if (!it.hasNext()) {
                    break;
                }
                ScoredDynaQDocument next = it.next();
                int currentLuceneIndexNumber = next.getCurrentLuceneIndexNumber(multiIndexReader);
                String attributeValue = next.getAttributeValue(AttributeConfig.IndexAttributes.URI);
                TermFreqVector termFreqVector = multiIndexReader.getTermFreqVector(currentLuceneIndexNumber, AttributeConfig.IndexAttributes.BODY);
                if (termFreqVector != null) {
                    String[] terms = termFreqVector.getTerms();
                    int[] termFrequencies = termFreqVector.getTermFrequencies();
                    for (int i4 = 0; i4 < terms.length; i4++) {
                        hashMap.put(new Term(AttributeConfig.IndexAttributes.BODY, terms[i4]).text(), Float.valueOf(termFrequencies[i4] * new DefaultSimilarity().idf(multiIndexReader.docFreq(r0), multiIndexReader.numDocs())));
                    }
                    String[] termsFromResultList = getTermsFromResultList(multiIndexReader, dynaQResultList, i2);
                    double[] dArr = new double[termsFromResultList.length];
                    for (int i5 = 0; i5 < termsFromResultList.length; i5++) {
                        if (hashMap.containsKey(termsFromResultList[i5])) {
                            dArr[i5] = ((Float) hashMap.get(termsFromResultList[i5])).floatValue();
                        } else {
                            dArr[i5] = 0.0d;
                        }
                    }
                    FeatureVector featureVector = new FeatureVector(Integer.toString(i3), dArr);
                    this.m_mapDocFv.put(attributeValue, featureVector);
                    doublingIncrementalClusterer.add(featureVector);
                    i3++;
                    Thread.sleep(0L);
                }
            }
            return doublingIncrementalClusterer;
        } finally {
            IndexAccessor.releaseIndexReader(multiIndexReader);
        }
    }

    private String[] getTermsFromResultList(IndexReader indexReader, DynaQResultList dynaQResultList, int i) throws Exception {
        Iterator<ScoredDynaQDocument> it = dynaQResultList.getResultDocs().iterator();
        HashSet hashSet = new HashSet();
        int i2 = 1;
        while (i2 <= i && it.hasNext()) {
            TermFreqVector termFreqVector = indexReader.getTermFreqVector(it.next().getCurrentLuceneIndexNumber(indexReader), AttributeConfig.IndexAttributes.BODY);
            if (termFreqVector != null) {
                for (String str : termFreqVector.getTerms()) {
                    hashSet.add(new Term(AttributeConfig.IndexAttributes.BODY, str).text());
                }
                i2++;
            }
        }
        return (String[]) hashSet.toArray(new String[hashSet.size()]);
    }

    private Object[][] extractClusterTerms(DoublingIncrementalClusterer doublingIncrementalClusterer, int i, LuceneServicePlugin luceneServicePlugin) throws Exception {
        MultiReader multiIndexReader = IndexAccessor.getMultiIndexReader(luceneServicePlugin.getIndexPaths(), true);
        try {
            HashMap hashMap = new HashMap();
            Iterator<String> it = this.m_mapDocFv.keySet().iterator();
            while (it.hasNext()) {
                String str = it.next().toString();
                hashMap.put(str, Integer.valueOf(doublingIncrementalClusterer.classifyInternal(this.m_mapDocFv.get(str))));
            }
            ArrayList arrayList = new ArrayList(hashMap.entrySet());
            Collections.sort(arrayList, new CompareByClusterID());
            int i2 = 0;
            boolean z = true;
            String str2 = "";
            String[] strArr = new String[doublingIncrementalClusterer.numberOfClusters()];
            int i3 = 0;
            Iterator it2 = arrayList.iterator();
            while (it2.hasNext()) {
                Map.Entry entry = (Map.Entry) it2.next();
                if (((Integer) entry.getValue()).intValue() != i2 && !z) {
                    strArr[i3] = str2;
                    str2 = "";
                    i3++;
                }
                str2 = String.valueOf(str2) + new DynaQDocument(((String) entry.getKey()).toString()).getAttributeValue(AttributeConfig.IndexAttributes.BODY);
                i2 = ((Integer) entry.getValue()).intValue();
                z = false;
            }
            if (i3 >= strArr.length) {
                IndexAccessor.releaseIndexReader(multiIndexReader);
                return null;
            }
            strArr[i3] = str2;
            MoreLikeThis createMoreLikeThis = luceneServicePlugin.createMoreLikeThis(multiIndexReader);
            createMoreLikeThis.setMaxQueryTerms(i);
            createMoreLikeThis.setMinTermFreq(2);
            createMoreLikeThis.setMinWordLen(2);
            createMoreLikeThis.setBoost(true);
            Object[][] objArr = new Object[strArr.length][i];
            for (int i4 = 0; i4 < strArr.length; i4++) {
                PriorityQueue retrieveTerms = createMoreLikeThis.retrieveTerms(new StringReader(strArr[i4]));
                for (int i5 = 0; i5 < retrieveTerms.size(); i5++) {
                    Object[] objArr2 = (Object[]) retrieveTerms.pop();
                    if (!((String) objArr2[0]).matches("[0-9a-f]{6}") && i5 < i) {
                        objArr[i4][i5] = objArr2;
                    }
                }
            }
            return objArr;
        } finally {
            IndexAccessor.releaseIndexReader(multiIndexReader);
        }
    }
}
