package org.dynaq.search.pull.sections;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.dynaq.config.AttributeConfig;
import org.dynaq.core.DynaQDocument;
import org.dynaq.core.DynaQResultList;
import org.dynaq.core.ScoredDynaQDocument;
import org.dynaq.index.LuceneIndexSet;
import org.dynaq.util.clustering.DoublingIncrementalClusterer;
import org.dynaq.util.clustering.FeatureVector;
import org.dynaq.util.lucene.basic.Buzzwords;
import org.dynaq.util.lucene.basic.IndexAccessor;
import org.dynaq.util.lucene.basic.RemoteIndexReader;
import org.dynaq.util.lucene.basic.Term2FrequencyEntry;

/* loaded from: input_file:org/dynaq/search/pull/sections/ClusterTerms.class */
public class ClusterTerms {
    Map<String, FeatureVector> m_mapDocFv = new HashMap();

    public String[][] getClusterTerms(DynaQResultList dynaQResultList, int i, int i2, int i3, LuceneIndexSet luceneIndexSet) throws Exception {
        return extractClusterTerms(generateClusteringObject(dynaQResultList, i, i3, luceneIndexSet), i2, luceneIndexSet);
    }

    private DoublingIncrementalClusterer generateClusteringObject(DynaQResultList dynaQResultList, int i, int i2, LuceneIndexSet luceneIndexSet) throws Exception {
        RemoteIndexReader multiIndexReader = IndexAccessor.getMultiIndexReader(luceneIndexSet.getIndexPaths(), true);
        Iterator<ScoredDynaQDocument> it = dynaQResultList.getResultDocs().iterator();
        DoublingIncrementalClusterer doublingIncrementalClusterer = new DoublingIncrementalClusterer(i);
        HashMap hashMap = new HashMap();
        int i3 = 1;
        while (i3 <= i2 && it.hasNext()) {
            String docId = it.next().getDocId();
            List<Term2FrequencyEntry> topFrequentTerms = multiIndexReader.getTopFrequentTerms(docId, AttributeConfig.IndexAttributes.BODY, 4, 3, 1000);
            if (topFrequentTerms != null && !topFrequentTerms.isEmpty()) {
                Iterator<Term2FrequencyEntry> it2 = topFrequentTerms.iterator();
                while (it2.hasNext()) {
                    hashMap.put(it2.next().getTerm(), Float.valueOf(r0.getFrequency().intValue() * new DefaultSimilarity().idf(multiIndexReader.documentFrequency(AttributeConfig.IndexAttributes.BODY, r0.getTerm()).intValue(), multiIndexReader.numDocs().intValue())));
                }
                String[] termsFromResultList = getTermsFromResultList(multiIndexReader, dynaQResultList, i2);
                double[] dArr = new double[termsFromResultList.length];
                for (int i4 = 0; i4 < termsFromResultList.length; i4++) {
                    if (hashMap.containsKey(termsFromResultList[i4])) {
                        dArr[i4] = ((Float) hashMap.get(termsFromResultList[i4])).floatValue();
                    } else {
                        dArr[i4] = 0.0d;
                    }
                }
                FeatureVector featureVector = new FeatureVector(Integer.toString(i3), dArr);
                this.m_mapDocFv.put(docId, featureVector);
                doublingIncrementalClusterer.add(featureVector);
                i3++;
                Thread.sleep(0L);
            }
        }
        return doublingIncrementalClusterer;
    }

    private String[] getTermsFromResultList(RemoteIndexReader remoteIndexReader, DynaQResultList dynaQResultList, int i) throws Exception {
        Iterator<ScoredDynaQDocument> it = dynaQResultList.getResultDocs().iterator();
        HashSet hashSet = new HashSet();
        int i2 = 1;
        while (i2 <= i && it.hasNext()) {
            List<Term2FrequencyEntry> topFrequentTerms = remoteIndexReader.getTopFrequentTerms(it.next().getDocId(), AttributeConfig.IndexAttributes.BODY, 2, 3, 1000);
            if (topFrequentTerms != null && !topFrequentTerms.isEmpty()) {
                Iterator<Term2FrequencyEntry> it2 = topFrequentTerms.iterator();
                while (it2.hasNext()) {
                    hashSet.add(it2.next().getTerm());
                }
                i2++;
            }
        }
        return (String[]) hashSet.toArray(new String[0]);
    }

    private String[][] extractClusterTerms(DoublingIncrementalClusterer doublingIncrementalClusterer, int i, LuceneIndexSet luceneIndexSet) throws Exception {
        HashMap hashMap = new HashMap();
        Iterator<String> it = this.m_mapDocFv.keySet().iterator();
        while (it.hasNext()) {
            String str = it.next().toString();
            hashMap.put(str, Integer.valueOf(doublingIncrementalClusterer.classifyInternal(this.m_mapDocFv.get(str))));
        }
        ArrayList arrayList = new ArrayList(hashMap.entrySet());
        Collections.sort(arrayList, new CompareByClusterID());
        int i2 = 0;
        boolean z = true;
        String str2 = "";
        String[] strArr = new String[doublingIncrementalClusterer.numberOfClusters()];
        int i3 = 0;
        Iterator it2 = arrayList.iterator();
        while (it2.hasNext()) {
            Map.Entry entry = (Map.Entry) it2.next();
            if (((Integer) entry.getValue()).intValue() != i2 && !z) {
                strArr[i3] = str2;
                str2 = "";
                i3++;
            }
            str2 = str2 + new DynaQDocument(((String) entry.getKey()).toString()).getAttributeValue(AttributeConfig.IndexAttributes.BODY);
            i2 = ((Integer) entry.getValue()).intValue();
            z = false;
        }
        if (i3 >= strArr.length) {
            return null;
        }
        strArr[i3] = str2;
        String[][] strArr2 = new String[strArr.length][i];
        for (int i4 = 0; i4 < strArr.length; i4++) {
            int i5 = 0;
            Iterator<Map.Entry<String, Float>> it3 = Buzzwords.getBuzzwordsWithTfIdf(strArr[i4], AttributeConfig.IndexAttributes.BODY, i, false, luceneIndexSet.getIndexPaths()).entrySet().iterator();
            while (it3.hasNext()) {
                String key = it3.next().getKey();
                if (!key.matches("[0-9a-f]{6}") && i5 < i) {
                    strArr2[i4][i5] = key;
                }
                i5++;
            }
        }
        return strArr2;
    }
}
