/*
 * Decompiled with CFR 0.152.
 */
package weka.clusterers;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Random;
import java.util.Vector;
import weka.classifiers.rules.DecisionTableHashKey;
import weka.clusterers.NumberOfClustersRequestable;
import weka.clusterers.RandomizableClusterer;
import weka.clusterers.UpdateableClusterer;
import weka.core.AbstractInstance;
import weka.core.AttributeStats;
import weka.core.Capabilities;
import weka.core.DenseInstance;
import weka.core.EuclideanDistance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.NormalizableDistance;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.SparseInstance;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;

public class Canopy
extends RandomizableClusterer
implements UpdateableClusterer,
NumberOfClustersRequestable,
OptionHandler,
TechnicalInformationHandler {
    private static final long serialVersionUID = 2067574593448223334L;
    protected Instances m_canopies;
    protected List<double[]> m_canopyT2Density;
    protected List<double[][]> m_canopyCenters;
    protected List<double[]> m_canopyNumMissingForNumerics;
    protected List<long[]> m_clusterCanopies;
    public static final double DEFAULT_T2 = -1.0;
    public static final double DEFAULT_T1 = -1.25;
    protected double m_userT2 = -1.0;
    protected double m_userT1;
    protected double m_t1 = this.m_userT1 = -1.25;
    protected double m_t2 = this.m_userT2;
    protected int m_periodicPruningRate = 10000;
    protected double m_minClusterDensity = 2.0;
    protected int m_maxCanopyCandidates = 100;
    protected boolean m_didPruneLastTime = true;
    protected int m_instanceCount;
    protected int m_numClustersRequested = -1;
    protected Filter m_missingValuesReplacer;
    protected boolean m_dontReplaceMissing = false;
    protected NormalizableDistance m_distanceFunction = new EuclideanDistance();
    protected Instances m_trainingData;

    public String globalInfo() {
        return "Cluster data using the capopy clustering algorithm, which requires just one pass over the data. Can run in eitherbatch or incremental mode. Results are generally not as good when running incrementally as the min/max for each numeric attribute is not known in advance. Has a heuristic (based on attribute std. deviations), that can be used in batch mode, for setting the T2 distance. The T2 distance determines how many canopies (clusters) are formed. When the user specifies a specific number (N) of clusters to generate, the algorithm will return the top N canopies (as determined by T2 density) when N < number of canopies (this applies to both batch and incremental learning); when N > number of canopies, the difference is made up by selecting training instances randomly (this can only be done when batch training). For more information see:\n\n" + this.getTechnicalInformation().toString();
    }

    @Override
    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation result = new TechnicalInformation(TechnicalInformation.Type.INPROCEEDINGS);
        result.setValue(TechnicalInformation.Field.AUTHOR, "A. McCallum and K. Nigam and L.H. Ungar");
        result.setValue(TechnicalInformation.Field.TITLE, "Efficient Clustering of High Dimensional Data Sets with Application to Reference Matching");
        result.setValue(TechnicalInformation.Field.BOOKTITLE, "Proceedings of the sixth ACM SIGKDD internation conference on knowledge discovery and data mining ACM-SIAM symposium on Discrete algorithms");
        result.setValue(TechnicalInformation.Field.YEAR, "2000");
        result.setValue(TechnicalInformation.Field.PAGES, "169-178");
        return result;
    }

    @Override
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();
        result.enable(Capabilities.Capability.NO_CLASS);
        result.enable(Capabilities.Capability.NOMINAL_ATTRIBUTES);
        result.enable(Capabilities.Capability.NUMERIC_ATTRIBUTES);
        result.enable(Capabilities.Capability.MISSING_VALUES);
        return result;
    }

    @Override
    public Enumeration<Option> listOptions() {
        Vector<Option> result = new Vector<Option>();
        result.addElement(new Option("\tNumber of clusters.\n\t(default 2).", "N", 1, "-N <num>"));
        result.addElement(new Option("\tMaximum number of candidate canopies to retain in memory\n\tat any one time. T2 distance plus, data characteristics,\n\twill determine how many candidate canopies are formed before\n\tperiodic and final pruning are performed, which might result\n\tin exceess memory consumption. This setting avoids large numbers\n\tof candidate canopies consuming memory. (default = 100)", "-max-candidates", 1, "-max-candidates <num>"));
        result.addElement(new Option("\tHow often to prune low density canopies. \n\t(default = every 10,000 training instances)", "periodic-pruning", 1, "-periodic-pruning <num>"));
        result.addElement(new Option("\tMinimum canopy density, below which a canopy will be pruned\n\tduring periodic pruning. (default = 2 instances)", "min-density", 1, "-min-density"));
        result.addElement(new Option("\tThe T2 distance to use. Values < 0 indicate that\n\ta heuristic based on attribute std. deviation should be used to set this.\n\tNote that this heuristic can only be used when batch training\n\t(default = -1.0)", "t2", 1, "-t2"));
        result.addElement(new Option("\tThe T1 distance to use. A value < 0 is taken as a\n\tpositive multiplier for T2. (default = -1.5)", "t1", 1, "-t1"));
        result.addElement(new Option("\tDon't replace missing values with mean/mode when running in batch mode.\n", "M", 0, "-M"));
        result.addAll(Collections.list(super.listOptions()));
        return result.elements();
    }

    @Override
    public void setOptions(String[] options) throws Exception {
        String temp = Utils.getOption('N', options);
        if (temp.length() > 0) {
            this.setNumClusters(Integer.parseInt(temp));
        }
        if ((temp = Utils.getOption("max-candidates", options)).length() > 0) {
            this.setMaxNumCandidateCanopiesToHoldInMemory(Integer.parseInt(temp));
        }
        if ((temp = Utils.getOption("periodic-pruning", options)).length() > 0) {
            this.setPeriodicPruningRate(Integer.parseInt(temp));
        }
        if ((temp = Utils.getOption("min-density", options)).length() > 0) {
            this.setMinimumCanopyDensity(Double.parseDouble(temp));
        }
        if ((temp = Utils.getOption("t2", options)).length() > 0) {
            this.setT2(Double.parseDouble(temp));
        }
        if ((temp = Utils.getOption("t1", options)).length() > 0) {
            this.setT1(Double.parseDouble(temp));
        }
        this.setDontReplaceMissingValues(Utils.getFlag('M', options));
        super.setOptions(options);
    }

    @Override
    public String[] getOptions() {
        Vector<String> result = new Vector<String>();
        result.add("-N");
        result.add("" + this.getNumClusters());
        result.add("-max-candidates");
        result.add("" + this.getMaxNumCandidateCanopiesToHoldInMemory());
        result.add("-periodic-pruning");
        result.add("" + this.getPeriodicPruningRate());
        result.add("-min-density");
        result.add("" + this.getMinimumCanopyDensity());
        result.add("-t2");
        result.add("" + this.getT2());
        result.add("-t1");
        result.add("" + this.getT1());
        if (this.getDontReplaceMissingValues()) {
            result.add("-M");
        }
        Collections.addAll(result, super.getOptions());
        return result.toArray(new String[result.size()]);
    }

    public static boolean nonEmptyCanopySetIntersection(long[] first, long[] second) throws Exception {
        if (first.length != second.length) {
            throw new Exception("Canopy lists need to be the same length");
        }
        if (first.length == 0 || second.length == 0) {
            return false;
        }
        for (int i = 0; i < first.length; ++i) {
            long firstBlock = first[i];
            long secondBlock = second[i];
            if ((firstBlock & secondBlock) == 0L) continue;
            return true;
        }
        return false;
    }

    private static void updateCanopyAssignment(long[] assigned, int toAssign) {
        int whichLong = toAssign / 64;
        int whichBitPosition = toAssign % 64;
        long mask = 1L << whichBitPosition;
        int n = whichLong;
        assigned[n] = assigned[n] | mask;
    }

    public long[] assignCanopies(Instance inst) throws Exception {
        if (this.m_missingValuesReplacer != null) {
            this.m_missingValuesReplacer.input(inst);
            inst = this.m_missingValuesReplacer.output();
        }
        int numLongs = this.m_canopies.size() / 64 + 1;
        long[] assigned = new long[numLongs];
        double minDist = Double.MAX_VALUE;
        double bitsSet = 0.0;
        int index = -1;
        for (int i = 0; i < this.m_canopies.numInstances(); ++i) {
            double dist = this.m_distanceFunction.distance(inst, this.m_canopies.instance(i));
            if (dist < minDist) {
                minDist = dist;
                index = i;
            }
            if (!(dist < this.m_t1)) continue;
            Canopy.updateCanopyAssignment(assigned, i);
            bitsSet += 1.0;
        }
        if (bitsSet == 0.0) {
            Canopy.updateCanopyAssignment(assigned, index);
        }
        return assigned;
    }

    protected void updateCanopyCenter(Instance newInstance, double[][] center, double[] numMissingNumerics) {
        for (int i = 0; i < newInstance.numAttributes(); ++i) {
            if (newInstance.attribute(i).isNumeric()) {
                if (center[i].length == 0) {
                    center[i] = new double[1];
                }
                if (!newInstance.isMissing(i)) {
                    double[] dArray = center[i];
                    dArray[0] = dArray[0] + newInstance.value(i);
                    continue;
                }
                int n = i;
                numMissingNumerics[n] = numMissingNumerics[n] + 1.0;
                continue;
            }
            if (!newInstance.attribute(i).isNominal()) continue;
            if (center[i].length == 0) {
                center[i] = new double[newInstance.attribute(i).numValues() + 1];
            }
            if (newInstance.isMissing(i)) {
                double[] dArray = center[i];
                int n = center[i].length - 1;
                dArray[n] = dArray[n] + 1.0;
                continue;
            }
            double[] dArray = center[i];
            int n = (int)newInstance.value(i);
            dArray[n] = dArray[n] + 1.0;
        }
    }

    @Override
    public void updateClusterer(Instance newInstance) throws Exception {
        if (this.m_instanceCount > 0 && this.m_instanceCount % this.m_periodicPruningRate == 0) {
            this.pruneCandidateCanopies();
        }
        ++this.m_instanceCount;
        if (this.m_missingValuesReplacer != null) {
            this.m_missingValuesReplacer.input(newInstance);
            newInstance = this.m_missingValuesReplacer.output();
        }
        this.m_distanceFunction.update(newInstance);
        boolean addPoint = true;
        for (int i = 0; i < this.m_canopies.numInstances(); ++i) {
            if (!(this.m_distanceFunction.distance(newInstance, this.m_canopies.instance(i)) < this.m_t2)) continue;
            double[] density = this.m_canopyT2Density.get(i);
            density[0] = density[0] + 1.0;
            addPoint = false;
            double[][] center = this.m_canopyCenters.get(i);
            double[] numMissingNumerics = this.m_canopyNumMissingForNumerics.get(i);
            this.updateCanopyCenter(newInstance, center, numMissingNumerics);
            break;
        }
        if (addPoint && this.m_canopies.numInstances() < this.m_maxCanopyCandidates) {
            this.m_canopies.add(newInstance);
            double[] density = new double[]{1.0};
            this.m_canopyT2Density.add(density);
            double[][] center = new double[newInstance.numAttributes()][0];
            double[] numMissingNumerics = new double[newInstance.numAttributes()];
            this.updateCanopyCenter(newInstance, center, numMissingNumerics);
            this.m_canopyCenters.add(center);
            this.m_canopyNumMissingForNumerics.add(numMissingNumerics);
        }
    }

    protected void pruneCandidateCanopies() {
        if (!this.m_didPruneLastTime && this.m_canopies.size() == this.m_maxCanopyCandidates) {
            return;
        }
        this.m_didPruneLastTime = false;
        for (int i = this.m_canopies.numInstances() - 1; i >= 0; --i) {
            double dens = this.m_canopyT2Density.get(i)[0];
            if (!(dens < this.m_minClusterDensity)) continue;
            double[] tempDens = this.m_canopyT2Density.remove(this.m_canopyT2Density.size() - 1);
            if (i < this.m_canopyT2Density.size()) {
                this.m_canopyT2Density.set(i, tempDens);
            }
            if (this.getDebug()) {
                System.err.println("Pruning a candidate canopy with density: " + dens);
            }
            this.m_didPruneLastTime = true;
            double[][] tempCenter = this.m_canopyCenters.remove(this.m_canopyCenters.size() - 1);
            if (i < this.m_canopyCenters.size()) {
                this.m_canopyCenters.set(i, tempCenter);
            }
            double[] tempNumMissingNumerics = this.m_canopyNumMissingForNumerics.remove(this.m_canopyNumMissingForNumerics.size() - 1);
            if (i < this.m_canopyNumMissingForNumerics.size()) {
                this.m_canopyNumMissingForNumerics.set(i, tempNumMissingNumerics);
            }
            if (i != this.m_canopies.numInstances() - 1) {
                this.m_canopies.swap(i, this.m_canopies.numInstances() - 1);
            }
            this.m_canopies.delete(this.m_canopies.numInstances() - 1);
        }
    }

    @Override
    public double[] distributionForInstance(Instance instance) throws Exception {
        if (this.m_canopies == null || this.m_canopies.size() == 0) {
            throw new Exception("No canopies available to cluster with!");
        }
        double[] d = new double[this.numberOfClusters()];
        if (this.m_missingValuesReplacer != null) {
            this.m_missingValuesReplacer.input(instance);
            instance = this.m_missingValuesReplacer.output();
        }
        for (int i = 0; i < this.m_canopies.numInstances(); ++i) {
            double distance = this.m_distanceFunction.distance(instance, this.m_canopies.instance(i));
            d[i] = 1.0 / (1.0 + distance);
        }
        Utils.normalize(d);
        return d;
    }

    private void assignCanopiesToCanopyCenters() {
        this.m_clusterCanopies = new ArrayList<long[]>();
        for (int i = 0; i < this.m_canopies.size(); ++i) {
            Instance inst = this.m_canopies.instance(i);
            try {
                long[] assignments = this.assignCanopies(inst);
                this.m_clusterCanopies.add(assignments);
                continue;
            }
            catch (Exception ex) {
                ex.printStackTrace();
            }
        }
    }

    protected void adjustCanopies(double[] densities) {
        if (this.m_numClustersRequested < 0) {
            this.assignCanopiesToCanopyCenters();
            this.m_trainingData = new Instances(this.m_canopies, 0);
            return;
        }
        if (this.m_canopies.numInstances() > this.m_numClustersRequested) {
            int[] sortedIndexes = Utils.stableSort(densities);
            Instances finalCanopies = new Instances(this.m_canopies, 0);
            int count = 0;
            int i = sortedIndexes.length - 1;
            while (count < this.m_numClustersRequested) {
                finalCanopies.add(this.m_canopies.instance(sortedIndexes[i]));
                ++count;
                --i;
            }
            this.m_canopies = finalCanopies;
            ArrayList<double[][]> tempCanopyCenters = new ArrayList<double[][]>();
            ArrayList<double[]> tempT2Dists = new ArrayList<double[]>();
            ArrayList<double[]> tempMissings = new ArrayList<double[]>();
            count = 0;
            int i2 = sortedIndexes.length - 1;
            while (count < finalCanopies.numInstances()) {
                tempCanopyCenters.add(this.m_canopyCenters.get(sortedIndexes[i2]));
                tempT2Dists.add(this.m_canopyT2Density.get(sortedIndexes[i2]));
                tempMissings.add(this.m_canopyNumMissingForNumerics.get(sortedIndexes[i2]));
                ++count;
                --i2;
            }
            this.m_canopyCenters = tempCanopyCenters;
            this.m_canopyT2Density = tempT2Dists;
            this.m_canopyNumMissingForNumerics = tempMissings;
        } else if (this.m_canopies.numInstances() < this.m_numClustersRequested && this.m_trainingData != null && this.m_trainingData.numInstances() > 0) {
            Random r = new Random(this.getSeed());
            for (int i = 0; i < 10; ++i) {
                r.nextInt();
            }
            HashMap initC = new HashMap();
            DecisionTableHashKey hk = null;
            for (int i = 0; i < this.m_canopies.numInstances(); ++i) {
                try {
                    hk = new DecisionTableHashKey(this.m_canopies.instance(i), this.m_canopies.numAttributes(), true);
                    initC.put(hk, null);
                    continue;
                }
                catch (Exception e2) {
                    e2.printStackTrace();
                }
            }
            for (int j = this.m_trainingData.numInstances() - 1; j >= 0; --j) {
                int instIndex = r.nextInt(j + 1);
                try {
                    hk = new DecisionTableHashKey(this.m_trainingData.instance(instIndex), this.m_trainingData.numAttributes(), true);
                }
                catch (Exception e3) {
                    e3.printStackTrace();
                }
                if (!initC.containsKey(hk)) {
                    Instance newInstance = this.m_trainingData.instance(instIndex);
                    this.m_canopies.add(newInstance);
                    double[] density = new double[]{1.0};
                    this.m_canopyT2Density.add(density);
                    double[][] center = new double[newInstance.numAttributes()][0];
                    double[] numMissingNumerics = new double[newInstance.numAttributes()];
                    this.updateCanopyCenter(newInstance, center, numMissingNumerics);
                    this.m_canopyCenters.add(center);
                    this.m_canopyNumMissingForNumerics.add(numMissingNumerics);
                    initC.put(hk, null);
                }
                this.m_trainingData.swap(j, instIndex);
                if (this.m_canopies.numInstances() == this.m_numClustersRequested) break;
            }
        }
        this.assignCanopiesToCanopyCenters();
        this.m_trainingData = new Instances(this.m_canopies, 0);
    }

    @Override
    public void updateFinished() {
        if (this.m_canopies == null || this.m_canopies.numInstances() == 0) {
            return;
        }
        this.pruneCandidateCanopies();
        double[] densities = new double[this.m_canopies.size()];
        for (int i = 0; i < this.m_canopies.numInstances(); ++i) {
            double[] density = this.m_canopyT2Density.get(i);
            double[][] centerSums = this.m_canopyCenters.get(i);
            double[] numMissingForNumerics = this.m_canopyNumMissingForNumerics.get(i);
            double[] finalCenter = new double[this.m_canopies.numAttributes()];
            for (int j = 0; j < this.m_canopies.numAttributes(); ++j) {
                if (this.m_canopies.attribute(j).isNumeric()) {
                    if (numMissingForNumerics[j] == density[0]) {
                        finalCenter[j] = Utils.missingValue();
                        continue;
                    }
                    finalCenter[j] = centerSums[j][0] / (density[0] - numMissingForNumerics[j]);
                    continue;
                }
                if (!this.m_canopies.attribute(j).isNominal()) continue;
                int mode = Utils.maxIndex(centerSums[j]);
                finalCenter[j] = mode == centerSums[j].length - 1 ? Utils.missingValue() : (double)mode;
            }
            AbstractInstance finalCenterInst = this.m_canopies.instance(i) instanceof SparseInstance ? new SparseInstance(1.0, finalCenter) : new DenseInstance(1.0, finalCenter);
            this.m_canopies.set(i, finalCenterInst);
            this.m_canopies.instance(i).setWeight(density[0]);
            densities[i] = density[0];
        }
        this.adjustCanopies(densities);
    }

    public void initializeDistanceFunction(Instances init) throws Exception {
        if (this.m_missingValuesReplacer != null) {
            init = Filter.useFilter(init, this.m_missingValuesReplacer);
        }
        this.m_distanceFunction.setInstances(init);
    }

    protected void setT2T1BasedOnStdDev(Instances trainingBatch) throws Exception {
        double normalizedStdDevSum = 0.0;
        for (int i = 0; i < trainingBatch.numAttributes(); ++i) {
            if (trainingBatch.attribute(i).isNominal()) {
                normalizedStdDevSum += 0.25;
                continue;
            }
            if (!trainingBatch.attribute(i).isNumeric()) continue;
            AttributeStats stats = trainingBatch.attributeStats(i);
            if (trainingBatch.numInstances() - stats.missingCount <= 2) continue;
            double stdDev = stats.numericStats.stdDev;
            double min2 = stats.numericStats.min;
            double max2 = stats.numericStats.max;
            if (Utils.isMissingValue(stdDev) || !(max2 - min2 > 0.0)) continue;
            stdDev = 0.5 * stdDev / (max2 - min2);
            normalizedStdDevSum += stdDev;
        }
        if ((normalizedStdDevSum = Math.sqrt(normalizedStdDevSum)) > 0.0) {
            this.m_t2 = normalizedStdDevSum;
        }
    }

    @Override
    public void buildClusterer(Instances data) throws Exception {
        this.m_t1 = this.m_userT1;
        this.m_t2 = this.m_userT2;
        if (data.numInstances() == 0 && this.m_userT2 < 0.0) {
            System.err.println("The heuristic for setting T2 based on std. dev. can't be used when running in incremental mode. Using default of 1.0.");
            this.m_t2 = 1.0;
        }
        this.m_canopyT2Density = new ArrayList<double[]>();
        this.m_canopyCenters = new ArrayList<double[][]>();
        this.m_canopyNumMissingForNumerics = new ArrayList<double[]>();
        if (data.numInstances() > 0) {
            if (!this.m_dontReplaceMissing) {
                this.m_missingValuesReplacer = new ReplaceMissingValues();
                this.m_missingValuesReplacer.setInputFormat(data);
                data = Filter.useFilter(data, this.m_missingValuesReplacer);
            }
            Random r = new Random(this.getSeed());
            for (int i = 0; i < 10; ++i) {
                r.nextInt();
            }
            data.randomize(r);
            if (this.m_userT2 < 0.0) {
                this.setT2T1BasedOnStdDev(data);
            }
        }
        this.m_t1 = this.m_userT1 > 0.0 ? this.m_userT1 : -this.m_userT1 * this.m_t2;
        this.m_distanceFunction.setInstances(data);
        this.m_canopies = new Instances(data, 0);
        if (data.numInstances() > 0) {
            this.m_trainingData = new Instances(data);
        }
        for (int i = 0; i < data.numInstances(); ++i) {
            if (this.getDebug() && i % this.m_periodicPruningRate == 0) {
                System.err.println("Processed: " + i);
            }
            this.updateClusterer(data.instance(i));
        }
        this.updateFinished();
    }

    @Override
    public int numberOfClusters() throws Exception {
        return this.m_canopies.numInstances();
    }

    public void setMissingValuesReplacer(Filter missingReplacer) {
        this.m_missingValuesReplacer = missingReplacer;
    }

    public Instances getCanopies() {
        return this.m_canopies;
    }

    public void setCanopies(Instances canopies) {
        this.m_canopies = canopies;
    }

    public List<long[]> getClusterCanopyAssignments() {
        return this.m_clusterCanopies;
    }

    public void setClusterCanopyAssignments(List<long[]> clusterCanopies) {
        this.m_clusterCanopies = clusterCanopies;
    }

    public double getActualT2() {
        return this.m_t2;
    }

    public double getActualT1() {
        return this.m_t1;
    }

    public String t1TipText() {
        return "The T1 distance to use. Values < 0 are taken as a positive multiplier for the T2 distance";
    }

    public void setT1(double t1) {
        this.m_userT1 = t1;
    }

    public double getT1() {
        return this.m_userT1;
    }

    public String t2TipText() {
        return "The T2 distance to use. Values < 0 indicate that this should be set using a heuristic based on attribute standard deviation (note that this onlyworks when batch training)";
    }

    public void setT2(double t2) {
        this.m_userT2 = t2;
    }

    public double getT2() {
        return this.m_userT2;
    }

    public String numClustersTipText() {
        return "Set number of clusters. -1 means number of clusters is determined by T2 distance";
    }

    @Override
    public void setNumClusters(int numClusters) throws Exception {
        this.m_numClustersRequested = numClusters;
    }

    public int getNumClusters() {
        return this.m_numClustersRequested;
    }

    public String periodicPruningRateTipText() {
        return "How often to prune low density canopies during training";
    }

    public void setPeriodicPruningRate(int p) {
        this.m_periodicPruningRate = p;
    }

    public int getPeriodicPruningRate() {
        return this.m_periodicPruningRate;
    }

    public String minimumCanopyDensityTipText() {
        return "The minimum T2-based density below which a canopy will be pruned during periodic pruning";
    }

    public void setMinimumCanopyDensity(double dens) {
        this.m_minClusterDensity = dens;
    }

    public double getMinimumCanopyDensity() {
        return this.m_minClusterDensity;
    }

    public String maxNumCandidateCanopiesToHoldInMemory() {
        return "The maximum number of candidate canopies to retain in main memory during training. T2 distance and data characteristics determine how many candidate canopies are formed before periodic and final pruning are performed. There may not be enough memory available if T2 is set too low.";
    }

    public void setMaxNumCandidateCanopiesToHoldInMemory(int max2) {
        this.m_maxCanopyCandidates = max2;
    }

    public int getMaxNumCandidateCanopiesToHoldInMemory() {
        return this.m_maxCanopyCandidates;
    }

    public String dontReplaceMissingValuesTipText() {
        return "Replace missing values globally with mean/mode.";
    }

    public void setDontReplaceMissingValues(boolean r) {
        this.m_dontReplaceMissing = r;
    }

    public boolean getDontReplaceMissingValues() {
        return this.m_dontReplaceMissing;
    }

    public static String printSingleAssignment(long[] assignments) {
        StringBuilder temp = new StringBuilder();
        boolean first = true;
        temp.append(" <");
        for (int j = 0; j < assignments.length; ++j) {
            long block = assignments[j];
            int offset = j * 64;
            for (int k = 0; k < 64; ++k) {
                long mask = 1L << k;
                if ((mask & block) == 0L) continue;
                temp.append("" + (!first ? "," : "") + (offset + k));
                if (!first) continue;
                first = false;
            }
        }
        temp.append(">");
        return temp.toString();
    }

    public static String printCanopyAssignments(Instances dataPoints, List<long[]> canopyAssignments) {
        StringBuilder temp = new StringBuilder();
        for (int i = 0; i < dataPoints.size(); ++i) {
            temp.append("Cluster " + i + ": ");
            temp.append(dataPoints.instance(i));
            if (canopyAssignments != null && canopyAssignments.size() == dataPoints.size()) {
                long[] assignments = canopyAssignments.get(i);
                temp.append(Canopy.printSingleAssignment(assignments));
            }
            temp.append("\n");
        }
        return temp.toString();
    }

    public String toString(boolean header) {
        StringBuffer temp = new StringBuffer();
        if (this.m_canopies == null) {
            return "No clusterer built yet";
        }
        if (header) {
            temp.append("\nCanopy clustering\n=================\n");
            temp.append("\nNumber of canopies (cluster centers) found: " + this.m_canopies.numInstances());
        }
        temp.append("\nT2 radius: " + String.format("%-10.3f", this.m_t2));
        temp.append("\nT1 radius: " + String.format("%-10.3f", this.m_t1));
        temp.append("\n\n");
        temp.append(Canopy.printCanopyAssignments(this.m_canopies, this.m_clusterCanopies));
        temp.append("\n");
        return temp.toString();
    }

    public String toString() {
        return this.toString(true);
    }

    public void cleanUp() {
        this.m_canopyNumMissingForNumerics = null;
        this.m_canopyT2Density = null;
        this.m_canopyCenters = null;
    }

    public static Canopy aggregateCanopies(List<Canopy> canopies, double aggregationT1, double aggregationT2, NormalizableDistance finalDistanceFunction, Filter missingValuesReplacer, int finalNumCanopies) {
        Instances collectedCanopies = new Instances(canopies.get(0).getCanopies(), 0);
        Instances finalCanopies = new Instances(collectedCanopies, 0);
        ArrayList<double[][]> finalCenters = new ArrayList<double[][]>();
        ArrayList<double[]> finalMissingNumerics = new ArrayList<double[]>();
        ArrayList<double[]> finalT2Densities = new ArrayList<double[]>();
        ArrayList<Instance> finalCanopiesList = new ArrayList<Instance>();
        ArrayList<double[][]> centersForEachCanopy = new ArrayList<double[][]>();
        ArrayList<double[]> numMissingNumericsForEachCanopy = new ArrayList<double[]>();
        for (Canopy c : canopies) {
            Instances tempC = c.getCanopies();
            for (int i = 0; i < tempC.numInstances(); ++i) {
                collectedCanopies.add(tempC.instance(i));
                centersForEachCanopy.add(c.m_canopyCenters.get(i));
                numMissingNumericsForEachCanopy.add(c.m_canopyNumMissingForNumerics.get(i));
            }
        }
        for (int i = 0; i < collectedCanopies.numInstances(); ++i) {
            boolean addPoint = true;
            Instance candidate = collectedCanopies.instance(i);
            double[][] candidateCenter = (double[][])centersForEachCanopy.get(i);
            double[] candidateMissingNumerics = (double[])numMissingNumericsForEachCanopy.get(i);
            for (int j = 0; j < finalCanopiesList.size(); ++j) {
                Instance fc = (Instance)finalCanopiesList.get(j);
                if (!(finalDistanceFunction.distance(candidate, fc) < aggregationT2)) continue;
                addPoint = false;
                double[][] center = (double[][])finalCenters.get(j);
                double[] missingNumerics = (double[])finalMissingNumerics.get(j);
                double[] dArray = (double[])finalT2Densities.get(j);
                dArray[0] = dArray[0] + candidate.weight();
                for (int k = 0; k < candidate.numAttributes(); ++k) {
                    int n = k;
                    missingNumerics[n] = missingNumerics[n] + candidateMissingNumerics[k];
                    for (int l = 0; l < center[k].length; ++l) {
                        double[] dArray2 = center[k];
                        int n2 = l;
                        dArray2[n2] = dArray2[n2] + candidateCenter[k][l];
                    }
                }
                break;
            }
            if (!addPoint) continue;
            finalCanopiesList.add(candidate);
            finalCanopies.add(candidate);
            finalCenters.add(candidateCenter);
            finalMissingNumerics.add(candidateMissingNumerics);
            double[] dens = new double[]{candidate.weight()};
            finalT2Densities.add(dens);
        }
        Canopy finalC = new Canopy();
        finalC.setCanopies(finalCanopies);
        finalC.setMissingValuesReplacer(missingValuesReplacer);
        finalC.m_distanceFunction = finalDistanceFunction;
        finalC.m_canopyCenters = finalCenters;
        finalC.m_canopyNumMissingForNumerics = finalMissingNumerics;
        finalC.m_canopyT2Density = finalT2Densities;
        finalC.m_t2 = aggregationT2;
        finalC.m_t1 = aggregationT1;
        try {
            finalC.setNumClusters(finalNumCanopies);
        }
        catch (Exception exception) {
            // empty catch block
        }
        finalC.updateFinished();
        return finalC;
    }

    public static void main(String[] args) {
        Canopy.runClusterer(new Canopy(), args);
    }
}

