package de.dfki.catwiesel.clustering;

import de.dfki.catwiesel.document.Category;
import de.dfki.catwiesel.document.Document;
import de.dfki.catwiesel.document.DocumentFactory;
import de.dfki.catwiesel.similarity.SimilarityException;
import de.dfki.catwiesel.similarity.SimilarityMeasure;
import de.dfki.catwiesel.vocabulary.AttributeURIs;
import de.dfki.inquisition.collections.ConfigurationException;
import de.dfki.inquisition.collections.MultiValueConfiguration;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Random;
import java.util.Set;
import java.util.logging.Logger;
import org.ontoware.rdf2go.model.node.URI;
import org.ontoware.rdf2go.model.node.impl.URIImpl;

/* loaded from: input_file:de/dfki/catwiesel/clustering/DoublingClusterer.class */
public class DoublingClusterer implements Clusterer {
    public static final String MAX_K_KEY = "maxK";
    public static final String ALPHA_KEY = "alpha";
    public static final String BETA_KEY = "beta";
    public static final String ROOT_KEY = "root";
    private DocumentFactory m_documentFactory;
    private boolean m_initState = true;
    private int m_maxK;
    private Category m_root;
    private double m_d;
    private double m_alpha;
    private double m_beta;
    private SimilarityMeasure m_measure;
    private static Logger logger = Logger.getLogger(DoublingClusterer.class.getName());

    public DoublingClusterer(DocumentFactory documentFactory) {
        this.m_documentFactory = documentFactory;
    }

    @Override // de.dfki.catwiesel.clustering.Clusterer
    public void createClustering(Set<Document> set, MultiValueConfiguration multiValueConfiguration, SimilarityMeasure similarityMeasure) throws ConfigurationException, SimilarityException {
        this.m_measure = similarityMeasure;
        this.m_maxK = Integer.parseInt(multiValueConfiguration.getFirstAsString(MAX_K_KEY));
        this.m_alpha = Double.parseDouble(multiValueConfiguration.getFirstAsString(ALPHA_KEY));
        this.m_beta = Double.parseDouble(multiValueConfiguration.getFirstAsString(BETA_KEY));
        this.m_root = this.m_documentFactory.getCategory(new URIImpl(multiValueConfiguration.getFirstAsString(ROOT_KEY)));
        this.m_initState = true;
        logger.info("starting clustering");
        Iterator<Document> it = set.iterator();
        while (it.hasNext()) {
            add(it.next());
        }
    }

    public void add(Document document) throws SimilarityException {
        logger.fine("adding document " + document + " to clustering");
        if (this.m_initState) {
            Category addNewCategory = this.m_root.addNewCategory("cluster_" + document.getURI().toString());
            addNewCategory.add(document.getURI());
            addNewCategory.addAttribute(AttributeURIs.CLUSTER_CENTER, document.getURI().toString());
            if (this.m_root.getSubCategoryCount() > this.m_maxK) {
                setInitialD();
                this.m_initState = false;
                merge();
                return;
            }
            return;
        }
        double d = Double.POSITIVE_INFINITY;
        Category category = null;
        for (Category category2 : this.m_root.getSubCategories()) {
            double similarity = 1.0d - this.m_measure.getSimilarity(document.getURI(), new URIImpl((String) category2.getValue(AttributeURIs.CLUSTER_CENTER)));
            if (similarity < d) {
                d = similarity;
                category = category2;
            }
        }
        if (d <= this.m_alpha * this.m_d) {
            category.add(document.getURI());
            return;
        }
        System.err.println("new cluster");
        Category addNewCategory2 = this.m_root.addNewCategory("cluster_" + document.getURI().toString());
        addNewCategory2.add(document.getURI());
        addNewCategory2.addAttribute(AttributeURIs.CLUSTER_CENTER, document.getURI().toString());
        if (this.m_root.getSubCategoryCount() > this.m_maxK) {
            merge();
        }
    }

    private void setInitialD() throws SimilarityException {
        this.m_d = Double.POSITIVE_INFINITY;
        for (Category category : this.m_root.getSubCategories()) {
            for (Category category2 : this.m_root.getSubCategories()) {
                if (!category.equals(category2)) {
                    double similarity = 1.0d - this.m_measure.getSimilarity(new URIImpl((String) category.getValue(AttributeURIs.CLUSTER_CENTER)), new URIImpl((String) category2.getValue(AttributeURIs.CLUSTER_CENTER)));
                    if (similarity < this.m_d) {
                        this.m_d = similarity;
                    }
                }
            }
        }
    }

    private void merge() throws SimilarityException {
        logger.fine("merging");
        this.m_d = this.m_beta * this.m_d;
        HashMap hashMap = new HashMap();
        Iterator<Category> it = this.m_root.getSubCategories().iterator();
        while (it.hasNext()) {
            hashMap.put(it.next(), new HashSet());
        }
        for (Category category : this.m_root.getSubCategories()) {
            for (Category category2 : this.m_root.getSubCategories()) {
                if (!category.equals(category2)) {
                    if (1.0d - this.m_measure.getSimilarity(new URIImpl((String) category.getValue(AttributeURIs.CLUSTER_CENTER)), new URIImpl((String) category2.getValue(AttributeURIs.CLUSTER_CENTER))) <= this.m_d) {
                        ((Set) hashMap.get(category)).add(category2);
                        ((Set) hashMap.get(category2)).add(category);
                    }
                }
            }
        }
        ArrayList arrayList = new ArrayList(this.m_root.getSubCategories());
        Random random = new Random();
        do {
            Category category3 = (Category) arrayList.get(random.nextInt(arrayList.size()));
            Set<Category> set = (Set) hashMap.get(category3);
            set.add(category3);
            Iterator<Category> it2 = set.iterator();
            while (it2.hasNext()) {
                Category next = it2.next();
                if (arrayList.contains(next)) {
                    arrayList.remove(next);
                } else {
                    it2.remove();
                }
            }
            merge(set, new URIImpl((String) category3.getValue(AttributeURIs.CLUSTER_CENTER)));
        } while (!arrayList.isEmpty());
        if (this.m_root.getSubCategoryCount() > this.m_maxK) {
            merge();
        }
    }

    private void merge(Set<Category> set, URI uri) {
        Category addNewCategory = this.m_root.addNewCategory("cluster_" + uri.toString());
        addNewCategory.add(uri);
        addNewCategory.addAttribute(AttributeURIs.CLUSTER_CENTER, uri.toString());
        for (Category category : set) {
            Iterator<Document> it = category.getDocuments().iterator();
            while (it.hasNext()) {
                addNewCategory.add(it.next().getURI());
            }
            category.delete();
        }
    }

    public double getAlpha() {
        return this.m_alpha;
    }

    public double getBeta() {
        return this.m_beta;
    }

    public int getMaxK() {
        return this.m_maxK;
    }

    @Override // de.dfki.catwiesel.clustering.Clusterer
    public MultiValueConfiguration getRequiredConfigs() {
        MultiValueConfiguration multiValueConfiguration = new MultiValueConfiguration();
        multiValueConfiguration.add(MAX_K_KEY, "The maximum number of clusters.");
        multiValueConfiguration.add(ALPHA_KEY, "The alpha parameter. Note that 'alpha/(alpha-1) <= beta' must be satisfied.");
        multiValueConfiguration.add(BETA_KEY, "The beta parameter. Note that 'alpha/(alpha-1) <= beta' must be satisfied.");
        multiValueConfiguration.add(ROOT_KEY, "The URI of an empty category, where the clustering should be created.");
        return multiValueConfiguration;
    }
}
