package de.dfki.km.exact.lucene.app;

import com.ibm.icu.util.StringTokenizer;
import de.dfki.km.exact.file.EUFileWriter;
import de.dfki.km.exact.lucene.LUSearcher;
import de.dfki.km.exact.lucene.LUTermFrequency;
import de.dfki.km.exact.lucene.LUWriter;
import de.dfki.km.exact.lucene.analyzer.LUAnalyzer;
import de.dfki.km.exact.lucene.file.LUDcoumentFactory;
import de.dfki.km.exact.lucene.meta.LUMetaSearcher;
import de.dfki.km.exact.lucene.util.LULocal;
import de.dfki.km.exact.lucene.voc.FIELD;
import de.dfki.km.exact.math.Average;
import de.dfki.km.exact.math.EUMath;
import de.dfki.km.exact.math.EUProbability;
import de.dfki.km.exact.nlp.FrequencyClass;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineNode;
import org.apache.pdfbox.util.PDFTextStripper;

/* loaded from: input_file:de/dfki/km/exact/lucene/app/ReadabilityChecker.class */
public class ReadabilityChecker {
    static PDDocument pdfDocument;
    static PDDocumentOutline pdfDocumentOutline;
    static HashSet<String> aaaa = new HashSet<>();
    static String mFile = "resource/example/1995_01.pdf";
    static LUMetaSearcher meta = LULocal.getMetaSearcherSpiegel();

    public static void main(String[] strArr) throws Exception {
        pdfDocument = PDDocument.load(mFile);
        pdfDocumentOutline = pdfDocument.getDocumentCatalog().getDocumentOutline();
        checkNode(pdfDocumentOutline, "");
        EUFileWriter.writeAsLines("resource/example/unknown.txt", aaaa);
    }

    public static void check(PDOutlineItem pDOutlineItem, String str) throws Exception {
        RAMDirectory rAMDirectory = new RAMDirectory();
        LUWriter lUWriter = new LUWriter((Directory) rAMDirectory);
        LUAnalyzer lUAnalyzer = new LUAnalyzer();
        lUAnalyzer.setCharacters(" …•‚”“„‘«»<>’,|0123456789`.;:?!-_'/()[]{}@§$%&=^°*+~#´\"\\→");
        lUWriter.setAnalyzer(lUAnalyzer);
        lUWriter.create();
        PDOutlineItem nextSibling = pDOutlineItem.getNextSibling();
        PDFTextStripper pDFTextStripper = new PDFTextStripper("utf8");
        StringTokenizer stringTokenizer = new StringTokenizer(pDFTextStripper.getText(pdfDocument), "\n");
        while (stringTokenizer.hasMoreTokens()) {
            System.out.println(stringTokenizer.nextToken());
        }
        pDFTextStripper.setStartBookmark(pDOutlineItem);
        if (nextSibling != null) {
            pDFTextStripper.setEndBookmark(nextSibling);
        }
        lUWriter.add(LUDcoumentFactory.getDocument("any", pDOutlineItem.getTitle(), pDFTextStripper.getText(pdfDocument)));
        lUWriter.close();
        LUSearcher lUSearcher = new LUSearcher((Directory) rAMDirectory);
        double max = FrequencyClass.getMax(meta.getMaxFrequency());
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        new ArrayList();
        for (LUTermFrequency lUTermFrequency : lUSearcher.getSingleWordTermInfos(0, FIELD.CONTENT)) {
            String switchCharacter = switchCharacter(lUTermFrequency.getTerm());
            Double d = new Double(meta.getFrequencyClass(switchCharacter));
            if (d.doubleValue() >= 9.0d) {
                arrayList.add(d);
                arrayList2.add(new Integer(lUTermFrequency.getFrequency()));
            }
            if (d.doubleValue() == -1.0d) {
                aaaa.add(switchCharacter);
                arrayList.add(Double.valueOf(max));
                arrayList2.add(new Integer(lUTermFrequency.getFrequency()));
            }
        }
        int i = 0;
        LinkedList linkedList = new LinkedList();
        Iterator it = arrayList2.iterator();
        while (it.hasNext()) {
            i += ((Integer) it.next()).intValue();
        }
        Iterator it2 = arrayList2.iterator();
        while (it2.hasNext()) {
            linkedList.add(Double.valueOf(new Double(((Integer) it2.next()).intValue()).doubleValue() / new Double(i).doubleValue()));
        }
        double roundNumber = EUMath.roundNumber(Average.getWeightedDoubleArith(arrayList, arrayList2), 2);
        System.out.println(str + lUSearcher.getValue(0, "label") + ": " + roundNumber + " (" + EUMath.roundNumber(EUProbability.getVariance(arrayList, linkedList, roundNumber)) + ")");
    }

    public static String switchCharacter(String str) {
        return str.replaceAll("ä", "ä").replaceAll("ü", "ü").replaceAll("ö", "ö");
    }

    public static void checkNode(PDOutlineNode pDOutlineNode, String str) throws Exception {
        PDOutlineItem firstChild = pDOutlineNode.getFirstChild();
        while (true) {
            PDOutlineItem pDOutlineItem = firstChild;
            if (pDOutlineItem == null) {
                return;
            }
            check(pDOutlineItem, str);
            checkNode(pDOutlineItem, "\t" + str);
            firstChild = pDOutlineItem.getNextSibling();
        }
    }
}
