/*
 * Decompiled with CFR 0.152.
 */
package weka.classifiers.functions.supportVector;

import java.util.Enumeration;
import java.util.Vector;
import weka.classifiers.functions.supportVector.Kernel;
import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.RevisionUtils;
import weka.core.SelectedTag;
import weka.core.Tag;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;

public class StringKernel
extends Kernel
implements TechnicalInformationHandler {
    private static final long serialVersionUID = -4902954211202690123L;
    private int m_cacheSize = 250007;
    private int m_internalCacheSize = 200003;
    private int m_strAttr;
    private double[] m_storage;
    private long[] m_keys;
    private int m_kernelEvals;
    private int m_numInsts;
    public static final int PRUNING_NONE = 0;
    public static final int PRUNING_LAMBDA = 1;
    public static final Tag[] TAGS_PRUNING = new Tag[]{new Tag(0, "No pruning"), new Tag(1, "Lambda pruning")};
    protected int m_PruningMethod = 0;
    protected double m_lambda = 0.5;
    private int m_subsequenceLength = 3;
    private int m_maxSubsequenceLength = 9;
    protected static final int MAX_POWER_OF_LAMBDA = 10000;
    protected double[] m_powersOflambda = null;
    private boolean m_normalize = false;
    private int maxCache;
    private double[] cachekh;
    private int[] cachekhK;
    private double[] cachekh2;
    private int[] cachekh2K;
    private int m_multX;
    private int m_multY;
    private int m_multZ;
    private int m_multZZ;
    private boolean m_useRecursionCache = true;

    public StringKernel() {
    }

    public StringKernel(Instances data, int cacheSize, int subsequenceLength, double lambda, boolean debug) throws Exception {
        this.setDebug(debug);
        this.setCacheSize(cacheSize);
        this.setInternalCacheSize(200003);
        this.setSubsequenceLength(subsequenceLength);
        this.setMaxSubsequenceLength(-1);
        this.setLambda(lambda);
        this.buildKernel(data);
    }

    @Override
    public String globalInfo() {
        return "Implementation of the subsequence kernel (SSK) as described in [1] and of the subsequence kernel with lambda pruning (SSK-LP) as described in [2].\n\nFor more information, see\n\n" + this.getTechnicalInformation().toString();
    }

    @Override
    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation result = new TechnicalInformation(TechnicalInformation.Type.ARTICLE);
        result.setValue(TechnicalInformation.Field.AUTHOR, "Huma Lodhi and Craig Saunders and John Shawe-Taylor and Nello Cristianini and Christopher J. C. H. Watkins");
        result.setValue(TechnicalInformation.Field.YEAR, "2002");
        result.setValue(TechnicalInformation.Field.TITLE, "Text Classification using String Kernels");
        result.setValue(TechnicalInformation.Field.JOURNAL, "Journal of Machine Learning Research");
        result.setValue(TechnicalInformation.Field.VOLUME, "2");
        result.setValue(TechnicalInformation.Field.PAGES, "419-444");
        result.setValue(TechnicalInformation.Field.HTTP, "http://www.jmlr.org/papers/v2/lodhi02a.html");
        TechnicalInformation additional = result.add(TechnicalInformation.Type.TECHREPORT);
        additional.setValue(TechnicalInformation.Field.AUTHOR, "F. Kleedorfer and A. Seewald");
        additional.setValue(TechnicalInformation.Field.YEAR, "2005");
        additional.setValue(TechnicalInformation.Field.TITLE, "Implementation of a String Kernel for WEKA");
        additional.setValue(TechnicalInformation.Field.INSTITUTION, "Oesterreichisches Forschungsinstitut fuer Artificial Intelligence");
        additional.setValue(TechnicalInformation.Field.ADDRESS, "Wien, Austria");
        additional.setValue(TechnicalInformation.Field.NUMBER, "TR-2005-13");
        return result;
    }

    @Override
    public Enumeration listOptions() {
        Vector result = new Vector();
        Enumeration en = super.listOptions();
        while (en.hasMoreElements()) {
            result.addElement(en.nextElement());
        }
        String desc = "";
        String param = "";
        for (int i = 0; i < TAGS_PRUNING.length; ++i) {
            if (i > 0) {
                param = param + "|";
            }
            SelectedTag tag = new SelectedTag(TAGS_PRUNING[i].getID(), TAGS_PRUNING);
            param = param + "" + tag.getSelectedTag().getID();
            desc = desc + "\t" + tag.getSelectedTag().getID() + " = " + tag.getSelectedTag().getReadable() + "\n";
        }
        result.addElement(new Option("\tThe pruning method to use:\n" + desc + "\t(default: " + 0 + ")", "P", 1, "-P <" + param + ">"));
        result.addElement(new Option("\tThe size of the cache (a prime number).\n\t(default: 250007)", "C", 1, "-C <num>"));
        result.addElement(new Option("\tThe size of the internal cache (a prime number).\n\t(default: 200003)", "IC", 1, "-IC <num>"));
        result.addElement(new Option("\tThe lambda constant. Penalizes non-continuous subsequence\n\tmatches. Must be in (0,1).\n\t(default: 0.5)", "L", 1, "-L <num>"));
        result.addElement(new Option("\tThe length of the subsequence.\n\t(default: 3)", "ssl", 1, "-ssl <num>"));
        result.addElement(new Option("\tThe maximum length of the subsequence.\n\t(default: 9)", "ssl-max", 1, "-ssl-max <num>"));
        result.addElement(new Option("\tUse normalization.\n\t(default: no)", "N", 0, "-N"));
        return result.elements();
    }

    @Override
    public void setOptions(String[] options) throws Exception {
        String tmpStr = Utils.getOption('P', options);
        if (tmpStr.length() != 0) {
            this.setPruningMethod(new SelectedTag(Integer.parseInt(tmpStr), TAGS_PRUNING));
        } else {
            this.setPruningMethod(new SelectedTag(0, TAGS_PRUNING));
        }
        tmpStr = Utils.getOption('C', options);
        if (tmpStr.length() != 0) {
            this.setCacheSize(Integer.parseInt(tmpStr));
        } else {
            this.setCacheSize(250007);
        }
        tmpStr = Utils.getOption("IC", options);
        if (tmpStr.length() != 0) {
            this.setInternalCacheSize(Integer.parseInt(tmpStr));
        } else {
            this.setInternalCacheSize(200003);
        }
        tmpStr = Utils.getOption('L', options);
        if (tmpStr.length() != 0) {
            this.setLambda(Double.parseDouble(tmpStr));
        } else {
            this.setLambda(0.5);
        }
        tmpStr = Utils.getOption("ssl", options);
        if (tmpStr.length() != 0) {
            this.setSubsequenceLength(Integer.parseInt(tmpStr));
        } else {
            this.setSubsequenceLength(3);
        }
        tmpStr = Utils.getOption("ssl-max", options);
        if (tmpStr.length() != 0) {
            this.setMaxSubsequenceLength(Integer.parseInt(tmpStr));
        } else {
            this.setMaxSubsequenceLength(9);
        }
        this.setUseNormalization(Utils.getFlag('N', options));
        if (this.getMaxSubsequenceLength() < 2 * this.getSubsequenceLength()) {
            throw new IllegalArgumentException("Lambda Pruning forbids even contiguous substring matches! Use a bigger value for ssl-max (at least 2*ssl).");
        }
        super.setOptions(options);
    }

    @Override
    public String[] getOptions() {
        Vector<String> result = new Vector<String>();
        String[] options = super.getOptions();
        for (int i = 0; i < options.length; ++i) {
            result.add(options[i]);
        }
        result.add("-P");
        result.add("" + this.m_PruningMethod);
        result.add("-C");
        result.add("" + this.getCacheSize());
        result.add("-IC");
        result.add("" + this.getInternalCacheSize());
        result.add("-L");
        result.add("" + this.getLambda());
        result.add("-ssl");
        result.add("" + this.getSubsequenceLength());
        result.add("-ssl-max");
        result.add("" + this.getMaxSubsequenceLength());
        if (this.getUseNormalization()) {
            result.add("-L");
        }
        return result.toArray(new String[result.size()]);
    }

    public String pruningMethodTipText() {
        return "The pruning method.";
    }

    public void setPruningMethod(SelectedTag value) {
        if (value.getTags() == TAGS_PRUNING) {
            this.m_PruningMethod = value.getSelectedTag().getID();
        }
    }

    public SelectedTag getPruningMethod() {
        return new SelectedTag(this.m_PruningMethod, TAGS_PRUNING);
    }

    public void setCacheSize(int value) {
        if (value >= 0) {
            this.m_cacheSize = value;
            this.clean();
        } else {
            System.out.println("Cache size cannot be smaller than 0 (provided: " + value + ")!");
        }
    }

    public int getCacheSize() {
        return this.m_cacheSize;
    }

    public String cacheSizeTipText() {
        return "The size of the cache (a prime number).";
    }

    public void setInternalCacheSize(int value) {
        if (value >= 0) {
            this.m_internalCacheSize = value;
            this.clean();
        } else {
            System.out.println("Cache size cannot be smaller than 0 (provided: " + value + ")!");
        }
    }

    public int getInternalCacheSize() {
        return this.m_internalCacheSize;
    }

    public String internalCacheSizeTipText() {
        return "The size of the internal cache (a prime number).";
    }

    public void setSubsequenceLength(int value) {
        this.m_subsequenceLength = value;
    }

    public int getSubsequenceLength() {
        return this.m_subsequenceLength;
    }

    public String subsequenceLengthTipText() {
        return "The subsequence length.";
    }

    public void setMaxSubsequenceLength(int value) {
        this.m_maxSubsequenceLength = value;
    }

    public int getMaxSubsequenceLength() {
        return this.m_maxSubsequenceLength;
    }

    public String maxSubsequenceLengthTipText() {
        return "The maximum subsequence length (theta in the paper)";
    }

    public void setLambda(double value) {
        this.m_lambda = value;
    }

    public double getLambda() {
        return this.m_lambda;
    }

    public String lambdaTipText() {
        return "Penalizes non-continuous subsequence matches, from (0,1)";
    }

    public void setUseNormalization(boolean value) {
        if (value != this.m_normalize) {
            this.clean();
        }
        this.m_normalize = value;
    }

    public boolean getUseNormalization() {
        return this.m_normalize;
    }

    public String useNormalizationTipText() {
        return "Whether to use normalization.";
    }

    @Override
    public double eval(int id1, int id2, Instance inst1) throws Exception {
        if (this.m_Debug && id1 > -1 && id2 > -1) {
            System.err.println("\nEvaluation of string kernel for");
            System.err.println(this.m_data.instance(id1).stringValue(this.m_strAttr));
            System.err.println("and");
            System.err.println(this.m_data.instance(id2).stringValue(this.m_strAttr));
        }
        if (id1 == id2 && this.m_normalize) {
            return 1.0;
        }
        double result = 0.0;
        long key = -1L;
        int location = -1;
        if (id1 >= 0 && this.m_keys != null) {
            key = id1 > id2 ? (long)id1 * (long)this.m_numInsts + (long)id2 : (long)id2 * (long)this.m_numInsts + (long)id1;
            if (key < 0L) {
                throw new Exception("Cache overflow detected!");
            }
            location = (int)(key % (long)this.m_keys.length);
            if (this.m_keys[location] == key + 1L) {
                if (this.m_Debug) {
                    System.err.println("result (cached): " + this.m_storage[location]);
                }
                return this.m_storage[location];
            }
        }
        ++this.m_kernelEvals;
        long start = System.currentTimeMillis();
        Instance inst2 = this.m_data.instance(id2);
        char[] s1 = inst1.stringValue(this.m_strAttr).toCharArray();
        char[] s2 = inst2.stringValue(this.m_strAttr).toCharArray();
        if (s1.length == 0 || s2.length == 0) {
            return 0.0;
        }
        result = this.m_normalize ? this.normalizedKernel(s1, s2) : this.unnormalizedKernel(s1, s2);
        if (this.m_Debug) {
            long duration = System.currentTimeMillis() - start;
            System.err.println("result: " + result);
            System.err.println("evaluation time:" + duration + "\n");
        }
        if (key != -1L) {
            this.m_storage[location] = result;
            this.m_keys[location] = key + 1L;
        }
        return result;
    }

    @Override
    public void clean() {
        this.m_storage = null;
        this.m_keys = null;
    }

    @Override
    public int numEvals() {
        return this.m_kernelEvals;
    }

    @Override
    public int numCacheHits() {
        return -1;
    }

    public double normalizedKernel(char[] s, char[] t) {
        double k1 = this.unnormalizedKernel(s, s);
        double k2 = this.unnormalizedKernel(t, t);
        double normTerm = Math.sqrt(k1 * k2);
        return this.unnormalizedKernel(s, t) / normTerm;
    }

    public double unnormalizedKernel(char[] s, char[] t) {
        if (t.length > s.length) {
            char[] buf = s;
            s = t;
            t = buf;
        }
        if (this.m_PruningMethod == 0) {
            this.m_multX = (s.length + 1) * (t.length + 1);
            this.m_multY = t.length + 1;
            this.m_multZ = 1;
            this.maxCache = this.m_internalCacheSize;
            if (this.maxCache == 0) {
                this.maxCache = (this.m_subsequenceLength + 1) * this.m_multX;
            } else if ((this.m_subsequenceLength + 1) * this.m_multX < this.maxCache) {
                this.maxCache = (this.m_subsequenceLength + 1) * this.m_multX;
            }
            this.m_useRecursionCache = true;
            this.cachekhK = new int[this.maxCache];
            this.cachekh2K = new int[this.maxCache];
            this.cachekh = new double[this.maxCache];
            this.cachekh2 = new double[this.maxCache];
        } else if (this.m_PruningMethod == 1) {
            this.maxCache = 0;
            this.m_useRecursionCache = false;
        }
        double res = this.m_PruningMethod == 1 ? this.kernelLP(this.m_subsequenceLength, s, s.length - 1, t, t.length - 1, this.m_maxSubsequenceLength) : this.kernel(this.m_subsequenceLength, s, s.length - 1, t, t.length - 1);
        this.cachekh = null;
        this.cachekhK = null;
        this.cachekh2 = null;
        this.cachekh2K = null;
        return res;
    }

    protected double getReturnValue(int n) {
        if (n == 0) {
            return 1.0;
        }
        return 0.0;
    }

    protected double kernel(int n, char[] s, int endIndexS, char[] t, int endIndexT) {
        if (Math.min(endIndexS + 1, endIndexT + 1) < n) {
            return this.getReturnValue(n);
        }
        double result = 0.0;
        for (int iS = endIndexS; iS > n - 2; --iS) {
            double buf = 0.0;
            char x = s[iS];
            for (int j = 0; j <= endIndexT; ++j) {
                if (t[j] != x) continue;
                buf += this.kernelHelper(n - 1, s, iS - 1, t, j - 1);
            }
            result += buf * this.m_powersOflambda[2];
        }
        return result;
    }

    protected double kernelHelper(int n, char[] s, int endIndexS, char[] t, int endIndexT) {
        if (n <= 0) {
            return this.getReturnValue(n);
        }
        if (Math.min(endIndexS + 1, endIndexT + 1) < n) {
            return this.getReturnValue(n);
        }
        int adr = 0;
        if (this.m_useRecursionCache && this.cachekhK[(adr = this.m_multX * n + this.m_multY * endIndexS + this.m_multZ * endIndexT) % this.maxCache] == adr + 1) {
            return this.cachekh[adr % this.maxCache];
        }
        double result = 0.0;
        result = this.m_lambda * this.kernelHelper(n, s, endIndexS - 1, t, endIndexT) + this.kernelHelper2(n, s, endIndexS, t, endIndexT);
        if (this.m_useRecursionCache) {
            this.cachekhK[adr % this.maxCache] = adr + 1;
            this.cachekh[adr % this.maxCache] = result;
        }
        return result;
    }

    protected double kernelHelper2(int n, char[] s, int endIndexS, char[] t, int endIndexT) {
        if (endIndexS < 0 || endIndexT < 0) {
            return this.getReturnValue(n);
        }
        int adr = 0;
        if (this.m_useRecursionCache && this.cachekh2K[(adr = this.m_multX * n + this.m_multY * endIndexS + this.m_multZ * endIndexT) % this.maxCache] == adr + 1) {
            return this.cachekh2[adr % this.maxCache];
        }
        char x = s[endIndexS];
        if (x == t[endIndexT]) {
            double ret = this.m_lambda * (this.kernelHelper2(n, s, endIndexS, t, endIndexT - 1) + this.m_lambda * this.kernelHelper(n - 1, s, endIndexS - 1, t, endIndexT - 1));
            if (this.m_useRecursionCache) {
                this.cachekh2K[adr % this.maxCache] = adr + 1;
                this.cachekh2[adr % this.maxCache] = ret;
            }
            return ret;
        }
        double ret = this.m_lambda * this.kernelHelper2(n, s, endIndexS, t, endIndexT - 1);
        if (this.m_useRecursionCache) {
            this.cachekh2K[adr % this.maxCache] = adr + 1;
            this.cachekh2[adr % this.maxCache] = ret;
        }
        return ret;
    }

    protected double kernelLP(int n, char[] s, int endIndexS, char[] t, int endIndexT, int remainingMatchLength) {
        if (Math.min(endIndexS + 1, endIndexT + 1) < n) {
            return this.getReturnValue(n);
        }
        if (remainingMatchLength == 0) {
            return this.getReturnValue(n);
        }
        double result = 0.0;
        for (int iS = endIndexS; iS > n - 2; --iS) {
            double buf = 0.0;
            char x = s[iS];
            for (int j = 0; j <= endIndexT; ++j) {
                if (t[j] != x) continue;
                buf += this.kernelHelperLP(n - 1, s, iS - 1, t, j - 1, remainingMatchLength - 2);
            }
            result += buf * this.m_powersOflambda[2];
        }
        return result;
    }

    protected double kernelHelperLP(int n, char[] s, int endIndexS, char[] t, int endIndexT, int remainingMatchLength) {
        if (n == 0) {
            return this.getReturnValue(n);
        }
        if (Math.min(endIndexS + 1, endIndexT + 1) < n) {
            return this.getReturnValue(n);
        }
        if (remainingMatchLength < 2 * n) {
            return this.getReturnValue(n);
        }
        int adr = 0;
        if (this.m_useRecursionCache && this.cachekh2K[(adr = this.m_multX * n + this.m_multY * endIndexS + this.m_multZ * endIndexT + this.m_multZZ * remainingMatchLength) % this.maxCache] == adr + 1) {
            return this.cachekh2[adr % this.maxCache];
        }
        int rml = 0;
        double result = 0.0;
        for (int iS = endIndexS - remainingMatchLength; iS <= endIndexS; ++iS) {
            result *= this.m_lambda;
            result += this.kernelHelper2LP(n, s, iS, t, endIndexT, rml++);
        }
        if (this.m_useRecursionCache && endIndexS >= 0 && endIndexT >= 0 && n >= 0) {
            this.cachekhK[adr % this.maxCache] = adr + 1;
            this.cachekh[adr % this.maxCache] = result;
        }
        return result;
    }

    protected double kernelHelper2LP(int n, char[] s, int endIndexS, char[] t, int endIndexT, int remainingMatchLength) {
        if (remainingMatchLength < 2 * n) {
            return this.getReturnValue(n);
        }
        if (endIndexS < 0 || endIndexT < 0) {
            return this.getReturnValue(n);
        }
        int adr = 0;
        if (this.m_useRecursionCache && this.cachekh2K[(adr = this.m_multX * n + this.m_multY * endIndexS + this.m_multZ * endIndexT + this.m_multZZ * remainingMatchLength) % this.maxCache] == adr + 1) {
            return this.cachekh2[adr % this.maxCache];
        }
        char x = s[endIndexS];
        if (x == t[endIndexT]) {
            double ret = this.m_lambda * (this.kernelHelper2LP(n, s, endIndexS, t, endIndexT - 1, remainingMatchLength - 1) + this.m_lambda * this.kernelHelperLP(n - 1, s, endIndexS - 1, t, endIndexT - 1, remainingMatchLength - 2));
            if (this.m_useRecursionCache && endIndexS >= 0 && endIndexT >= 0 && n >= 0) {
                this.cachekh2K[adr % this.maxCache] = adr + 1;
                this.cachekh2[adr % this.maxCache] = ret;
            }
            return ret;
        }
        int minIndex = endIndexT - remainingMatchLength;
        if (minIndex < 0) {
            minIndex = 0;
        }
        for (int i = endIndexT; i >= minIndex; --i) {
            if (x != t[i]) continue;
            int skipLength = endIndexT - i;
            double ret = this.getPowerOfLambda(skipLength) * this.kernelHelper2LP(n, s, endIndexS, t, i, remainingMatchLength - skipLength);
            if (this.m_useRecursionCache && endIndexS >= 0 && endIndexT >= 0 && n >= 0) {
                this.cachekh2K[adr % this.maxCache] = adr + 1;
                this.cachekh2[adr % this.maxCache] = ret;
            }
            return ret;
        }
        double ret = this.getReturnValue(n);
        if (this.m_useRecursionCache && endIndexS >= 0 && endIndexT >= 0 && n >= 0) {
            this.cachekh2K[adr % this.maxCache] = adr + 1;
            this.cachekh2[adr % this.maxCache] = ret;
        }
        return ret;
    }

    private double[] calculatePowersOfLambda() {
        double[] powers = new double[10001];
        powers[0] = 1.0;
        double val = 1.0;
        for (int i = 1; i <= 10000; ++i) {
            powers[i] = val *= this.m_lambda;
        }
        return powers;
    }

    private double getPowerOfLambda(int exponent) {
        if (exponent > 10000) {
            return Math.pow(this.m_lambda, exponent);
        }
        if (exponent < 0) {
            throw new IllegalArgumentException("only positive powers of lambda may be computed");
        }
        return this.m_powersOflambda[exponent];
    }

    @Override
    protected void initVars(Instances data) {
        super.initVars(data);
        this.m_kernelEvals = 0;
        this.m_strAttr = -1;
        for (int i = 0; i < data.numAttributes(); ++i) {
            if (i == data.classIndex() || data.attribute(i).type() != 2) continue;
            this.m_strAttr = i;
            break;
        }
        this.m_numInsts = this.m_data.numInstances();
        this.m_storage = new double[this.m_cacheSize];
        this.m_keys = new long[this.m_cacheSize];
        this.m_powersOflambda = this.calculatePowersOfLambda();
    }

    @Override
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();
        result.enable(Capabilities.Capability.STRING_ATTRIBUTES);
        result.enableAllClasses();
        result.enable(Capabilities.Capability.MISSING_CLASS_VALUES);
        return result;
    }

    @Override
    public void buildKernel(Instances data) throws Exception {
        super.buildKernel(data);
    }

    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 9895 $");
    }
}

