/*
 * Decompiled with CFR 0.152.
 */
package weka.filters.supervised.instance;

import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Random;
import java.util.Vector;
import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.SupervisedFilter;

public class SpreadSubsample
extends Filter
implements SupervisedFilter,
OptionHandler {
    static final long serialVersionUID = -3947033795243930016L;
    private int m_RandomSeed = 1;
    private int m_MaxCount;
    private double m_DistributionSpread = 0.0;
    private boolean m_AdjustWeights = false;

    public String globalInfo() {
        return "Produces a random subsample of a dataset. The original dataset must fit entirely in memory. This filter allows you to specify the maximum \"spread\" between the rarest and most common class. For example, you may specify that there be at most a 2:1 difference in class frequencies. When used in batch mode, subsequent batches are NOT resampled.";
    }

    public String adjustWeightsTipText() {
        return "Wether instance weights will be adjusted to maintain total weight per class.";
    }

    public boolean getAdjustWeights() {
        return this.m_AdjustWeights;
    }

    public void setAdjustWeights(boolean newAdjustWeights) {
        this.m_AdjustWeights = newAdjustWeights;
    }

    @Override
    public Enumeration listOptions() {
        Vector<Option> newVector = new Vector<Option>(4);
        newVector.addElement(new Option("\tSpecify the random number seed (default 1)", "S", 1, "-S <num>"));
        newVector.addElement(new Option("\tThe maximum class distribution spread.\n\t0 = no maximum spread, 1 = uniform distribution, 10 = allow at most\n\ta 10:1 ratio between the classes (default 0)", "M", 1, "-M <num>"));
        newVector.addElement(new Option("\tAdjust weights so that total weight per class is maintained.\n\tIndividual instance weighting is not preserved. (default no\n\tweights adjustment", "W", 0, "-W"));
        newVector.addElement(new Option("\tThe maximum count for any class value (default 0 = unlimited).\n", "X", 0, "-X <num>"));
        return newVector.elements();
    }

    @Override
    public void setOptions(String[] options) throws Exception {
        String seedString = Utils.getOption('S', options);
        if (seedString.length() != 0) {
            this.setRandomSeed(Integer.parseInt(seedString));
        } else {
            this.setRandomSeed(1);
        }
        String maxString = Utils.getOption('M', options);
        if (maxString.length() != 0) {
            this.setDistributionSpread(Double.valueOf(maxString));
        } else {
            this.setDistributionSpread(0.0);
        }
        String maxCount = Utils.getOption('X', options);
        if (maxCount.length() != 0) {
            this.setMaxCount(Double.valueOf(maxCount));
        } else {
            this.setMaxCount(0.0);
        }
        this.setAdjustWeights(Utils.getFlag('W', options));
        if (this.getInputFormat() != null) {
            this.setInputFormat(this.getInputFormat());
        }
    }

    @Override
    public String[] getOptions() {
        String[] options = new String[7];
        int current = 0;
        options[current++] = "-M";
        options[current++] = "" + this.getDistributionSpread();
        options[current++] = "-X";
        options[current++] = "" + this.getMaxCount();
        options[current++] = "-S";
        options[current++] = "" + this.getRandomSeed();
        if (this.getAdjustWeights()) {
            options[current++] = "-W";
        }
        while (current < options.length) {
            options[current++] = "";
        }
        return options;
    }

    public String distributionSpreadTipText() {
        return "The maximum class distribution spread. (0 = no maximum spread, 1 = uniform distribution, 10 = allow at most a 10:1 ratio between the classes).";
    }

    public void setDistributionSpread(double spread) {
        this.m_DistributionSpread = spread;
    }

    public double getDistributionSpread() {
        return this.m_DistributionSpread;
    }

    public String maxCountTipText() {
        return "The maximum count for any class value (0 = unlimited).";
    }

    public void setMaxCount(double maxcount) {
        this.m_MaxCount = (int)maxcount;
    }

    public double getMaxCount() {
        return this.m_MaxCount;
    }

    public String randomSeedTipText() {
        return "Sets the random number seed for subsampling.";
    }

    public int getRandomSeed() {
        return this.m_RandomSeed;
    }

    public void setRandomSeed(int newSeed) {
        this.m_RandomSeed = newSeed;
    }

    @Override
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();
        result.enableAllAttributes();
        result.enable(Capabilities.Capability.MISSING_VALUES);
        result.enable(Capabilities.Capability.NOMINAL_CLASS);
        return result;
    }

    @Override
    public boolean setInputFormat(Instances instanceInfo) throws Exception {
        super.setInputFormat(instanceInfo);
        this.setOutputFormat(instanceInfo);
        return true;
    }

    @Override
    public boolean input(Instance instance) {
        if (this.getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (this.m_NewBatch) {
            this.resetQueue();
            this.m_NewBatch = false;
        }
        if (this.isFirstBatchDone()) {
            this.push(instance);
            return true;
        }
        this.bufferInput(instance);
        return false;
    }

    @Override
    public boolean batchFinished() {
        if (this.getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (!this.isFirstBatchDone()) {
            this.createSubsample();
        }
        this.flushInput();
        this.m_NewBatch = true;
        this.m_FirstBatchDone = true;
        return this.numPendingOutput() != 0;
    }

    private void createSubsample() {
        int i;
        int classI = this.getInputFormat().classIndex();
        this.getInputFormat().sort(classI);
        int[] classIndices = this.getClassIndices();
        int[] counts = new int[this.getInputFormat().numClasses()];
        double[] weights = new double[this.getInputFormat().numClasses()];
        int min = -1;
        for (i = 0; i < this.getInputFormat().numInstances(); ++i) {
            Instance current = this.getInputFormat().instance(i);
            if (current.classIsMissing()) continue;
            int n = (int)current.classValue();
            counts[n] = counts[n] + 1;
            int n2 = (int)current.classValue();
            weights[n2] = weights[n2] + current.weight();
        }
        for (i = 0; i < counts.length; ++i) {
            if (counts[i] <= 0) continue;
            weights[i] = weights[i] / (double)counts[i];
        }
        int minIndex = -1;
        for (int i2 = 0; i2 < counts.length; ++i2) {
            if (min < 0 && counts[i2] > 0) {
                min = counts[i2];
                minIndex = i2;
                continue;
            }
            if (counts[i2] >= min || counts[i2] <= 0) continue;
            min = counts[i2];
            minIndex = i2;
        }
        if (min < 0) {
            System.err.println("SpreadSubsample: *warning* none of the classes have any values in them.");
            return;
        }
        int[] new_counts = new int[this.getInputFormat().numClasses()];
        for (int i3 = 0; i3 < counts.length; ++i3) {
            new_counts[i3] = (int)Math.abs(Math.min((double)counts[i3], (double)min * this.m_DistributionSpread));
            if (i3 == minIndex && this.m_DistributionSpread > 0.0 && this.m_DistributionSpread < 1.0) {
                new_counts[i3] = counts[i3];
            }
            if (this.m_DistributionSpread == 0.0) {
                new_counts[i3] = counts[i3];
            }
            if (this.m_MaxCount <= 0) continue;
            new_counts[i3] = Math.min(new_counts[i3], this.m_MaxCount);
        }
        Random random = new Random(this.m_RandomSeed);
        Hashtable<String, String> t = new Hashtable<String, String>();
        for (int j = 0; j < new_counts.length; ++j) {
            double newWeight = 1.0;
            if (this.m_AdjustWeights && new_counts[j] > 0) {
                newWeight = weights[j] * (double)counts[j] / (double)new_counts[j];
            }
            for (int k = 0; k < new_counts[j]; ++k) {
                boolean ok = false;
                do {
                    int index = classIndices[j] + Math.abs(random.nextInt()) % (classIndices[j + 1] - classIndices[j]);
                    if (t.get("" + index) != null) continue;
                    t.put("" + index, "");
                    ok = true;
                    if (index < 0) continue;
                    Instance newInst = (Instance)this.getInputFormat().instance(index).copy();
                    if (this.m_AdjustWeights) {
                        newInst.setWeight(newWeight);
                    }
                    this.push(newInst);
                } while (!ok);
            }
        }
    }

    private int[] getClassIndices() {
        int[] classIndices = new int[this.getInputFormat().numClasses() + 1];
        int currentClass = 0;
        classIndices[currentClass] = 0;
        for (int i = 0; i < this.getInputFormat().numInstances(); ++i) {
            int j;
            Instance current = this.getInputFormat().instance(i);
            if (current.classIsMissing()) {
                for (j = currentClass + 1; j < classIndices.length; ++j) {
                    classIndices[j] = i;
                }
                break;
            }
            if (current.classValue() == (double)currentClass) continue;
            j = currentClass + 1;
            while ((double)j <= current.classValue()) {
                classIndices[j] = i;
                ++j;
            }
            currentClass = (int)current.classValue();
        }
        if (currentClass <= this.getInputFormat().numClasses()) {
            for (int j = currentClass + 1; j < classIndices.length; ++j) {
                classIndices[j] = this.getInputFormat().numInstances();
            }
        }
        return classIndices;
    }

    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 9616 $");
    }

    public static void main(String[] argv) {
        SpreadSubsample.runFilter(new SpreadSubsample(), argv);
    }
}

