package dm.data.text;

import dm.data.texttype.TextDoc;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.Vector;

/* loaded from: input_file:dm/data/text/WordSet.class */
public class WordSet {
    public static final double BORDER = 3.0d;
    public static final double MAXNUMBER = 50000.0d;
    private int allPages;
    private int[] pageperclass;
    private HashMap wordsPerClass = new HashMap();
    private HashMap allWords = new HashMap();

    public int getAllPages() {
        return this.allPages;
    }

    public int getPageInClass(int i) {
        return this.pageperclass[i];
    }

    public WordEntry getClassOccurence(int i, String str) {
        return (WordEntry) this.wordsPerClass.get(generateKey(str, i));
    }

    public int getTotalOccurence(String str) {
        return ((Integer) this.allWords.get(str)).intValue();
    }

    public static String generateKey(String str, int i) {
        return String.valueOf(str) + "#" + i;
    }

    public int classNumber() {
        return this.pageperclass.length;
    }

    public int numberOfAllWords() {
        return this.allWords.size();
    }

    public double[] getClassProbs() {
        double[] dArr = new double[classNumber()];
        for (int i = 0; i < classNumber(); i++) {
            dArr[i] = this.pageperclass[i] / this.allPages;
        }
        return dArr;
    }

    public boolean minMaxSelector(String str, int i) {
        double d = 0.0d;
        for (int i2 = 0; i2 < classNumber(); i2++) {
            if (((WordEntry) this.wordsPerClass.get(generateKey(str, i2))) != null) {
                d += r0.pageCount;
            }
        }
        return d > 3.0d && i < 1000;
    }

    public boolean selector(String str, int i) {
        double[] calcX2forWord = calcX2forWord(str);
        double d = 0.0d;
        double d2 = -1.0d;
        for (int i2 = 0; i2 < calcX2forWord.length; i2++) {
            d += calcX2forWord[i2];
            d2 = Math.max(calcX2forWord[i2], d2);
        }
        double length = d / calcX2forWord.length;
        return d2 > 3.0d && ((double) i) < 50000.0d;
    }

    public Iterator GetAllWordIterator() {
        return this.allWords.keySet().iterator();
    }

    public Set GetAllEntries() {
        return this.allWords.entrySet();
    }

    public double[] calcX2forWord(String str) {
        double[] dArr = new double[classNumber()];
        double totalOccurence = getTotalOccurence(str);
        for (int i = 0; i < classNumber(); i++) {
            double pageInClass = getPageInClass(i);
            WordEntry classOccurence = getClassOccurence(i, str);
            double d = 0.0d;
            if (classOccurence != null) {
                d = classOccurence.pageCount;
            }
            double d2 = pageInClass - d;
            double allPages = ((getAllPages() - pageInClass) - totalOccurence) + d;
            double d3 = totalOccurence - d;
            dArr[i] = ((getAllPages() * ((d * allPages) - (d2 * d3))) * ((d * allPages) - (d2 * d3))) / ((((d + d2) * (d3 + allPages)) * (d + d3)) * (d2 + allPages));
        }
        return dArr;
    }

    public double[] calcOneforWord(String str) {
        double[] dArr = new double[classNumber()];
        getTotalOccurence(str);
        double d = 0.0d;
        for (int i = 0; i < classNumber(); i++) {
            if (getClassOccurence(i, str) != null) {
                d += r0.wordCount;
            }
        }
        for (int i2 = 0; i2 < classNumber(); i2++) {
            getPageInClass(i2);
            if (getClassOccurence(i2, str) != null) {
                dArr[i2] = (r0.wordCount / d) * r0.pageCount;
            } else {
                dArr[i2] = 0.0d;
            }
        }
        return dArr;
    }

    public double[] calcProbs(String str) {
        double[] dArr = new double[classNumber()];
        for (int i = 0; i < classNumber(); i++) {
            double totalOccurence = getTotalOccurence(str);
            if (getClassOccurence(i, str) != null) {
                dArr[i] = r0.wordCount / totalOccurence;
            } else {
                dArr[i] = 1.0E-6d;
            }
        }
        return dArr;
    }

    public WordSet(Vector[] vectorArr) {
        this.pageperclass = new int[vectorArr.length];
        for (int i = 0; i < vectorArr.length; i++) {
            Iterator it = vectorArr[i].iterator();
            while (it.hasNext()) {
                for (Map.Entry entry : ((TextDoc) it.next()).getWords().entrySet()) {
                    String str = (String) entry.getKey();
                    double doubleValue = ((Double) entry.getValue()).doubleValue();
                    WordEntry wordEntry = new WordEntry(str, i, ((int) doubleValue) * 10000);
                    if (this.wordsPerClass.containsKey(wordEntry.generateKey())) {
                        WordEntry wordEntry2 = (WordEntry) this.wordsPerClass.get(wordEntry.generateKey());
                        wordEntry2.pageCount++;
                        wordEntry2.wordCount = (int) (wordEntry2.wordCount + doubleValue);
                    } else {
                        this.wordsPerClass.put(wordEntry.generateKey(), wordEntry);
                    }
                    if (this.allWords.containsKey(str)) {
                        this.allWords.put(str, new Integer(((Integer) this.allWords.get(str)).intValue() + 1));
                    } else {
                        this.allWords.put(str, new Integer(1));
                    }
                }
                this.allPages++;
                int[] iArr = this.pageperclass;
                int i2 = i;
                iArr[i2] = iArr[i2] + 1;
            }
        }
    }

    public double calcInfoGain(String str) {
        double[] classProbs = getClassProbs();
        double d = 0.0d;
        for (int i = 0; i < classProbs.length; i++) {
            d += classProbs[i] * Math.log(classProbs[i]);
        }
        double d2 = d * (-1.0d);
        double allPages = getAllPages();
        double totalOccurence = getTotalOccurence(str);
        double d3 = 0.0d;
        for (int i2 = 0; i2 < classProbs.length; i2++) {
            WordEntry classOccurence = getClassOccurence(i2, str);
            double d4 = 0.0d;
            if (classOccurence != null) {
                d4 = classOccurence.pageCount;
            }
            double d5 = d4 / totalOccurence;
            d3 -= d5 * Math.log(d5);
        }
        double d6 = d2 - ((totalOccurence / allPages) * d3);
        double d7 = allPages - totalOccurence;
        double d8 = 0.0d;
        for (int i3 = 0; i3 < classProbs.length; i3++) {
            WordEntry classOccurence2 = getClassOccurence(i3, str);
            double d9 = 0.0d;
            if (classOccurence2 != null) {
                d9 = classOccurence2.pageCount;
            }
            double pageInClass = (getPageInClass(i3) - d9) / d7;
            d8 -= pageInClass * Math.log(pageInClass);
        }
        return d6 - ((d7 / allPages) * d8);
    }
}
