/*
 * Decompiled with CFR 0.152.
 */
package com.rapidminer.kobra.topicmodels;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.SimpleExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DataRow;
import com.rapidminer.example.table.DataRowFactory;
import com.rapidminer.example.table.ExampleTable;
import com.rapidminer.example.table.MemoryExampleTable;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.tools.Ontology;
import com.rapidminer.tools.RandomGenerator;
import gnu.trove.list.array.TDoubleArrayList;
import gnu.trove.list.array.TIntArrayList;
import gnu.trove.map.hash.TIntObjectHashMap;
import gnu.trove.map.hash.TObjectIntHashMap;
import gnu.trove.set.hash.TIntHashSet;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;

public class LSAEvaluationOperator
extends Operator {
    static String PARAMETER_NUMITERATIONS = "iterations";
    static String PARAMETER_NUMTOPICS = "number_of_topics";
    static String PARAMETER_NUMTESTS = "tests";
    static String PARAMETER_ALPHA = "alpha";
    static String PARAMETER_TEXT_ATTRIBUTE = "text_attribute";
    static String PARAMETER_GAMMA = "smooting_gamma";
    static String PARAMETER_SUPER = "supervised";
    int iters = 2000;
    int numTopics = 4;
    double alpha = 0.25;
    double maxTime = 0.0;
    private final InputPort input = (InputPort)this.getInputPorts().createPort("example set test set input as word-vectors");
    private final InputPort inputWords = (InputPort)this.getInputPorts().createPort("example set of words represented as factors (for instance singular vectors)");
    private final OutputPort output = (OutputPort)this.getOutputPorts().createPort("output neg log likelihoods");
    Random rn = null;

    public LSAEvaluationOperator(OperatorDescription description) {
        super(description);
    }

    public void doWork() throws OperatorException {
        this.iters = this.getParameterAsInt(PARAMETER_NUMITERATIONS);
        this.numTopics = this.getParameterAsInt(PARAMETER_NUMTOPICS);
        this.alpha = this.getParameterAsDouble(PARAMETER_ALPHA);
        int numTests = this.getParameterAsInt(PARAMETER_NUMTESTS);
        double gamma = this.getParameterAsDouble(PARAMETER_GAMMA);
        boolean supervised = this.getParameterAsBoolean(PARAMETER_SUPER);
        boolean locSeed = this.getParameterAsBoolean("use_local_random_seed");
        int seed = this.getParameterAsInt("local_random_seed");
        this.rn = locSeed ? new Random(seed) : new Random();
        ArrayList<Attribute> attributeList = new ArrayList<Attribute>();
        attributeList.add(AttributeFactory.createAttribute((String)"negloglikelihood", (int)2));
        MemoryExampleTable table = new MemoryExampleTable(attributeList);
        DataRowFactory factory = new DataRowFactory(0, '.');
        int numWords = 0;
        ExampleSet exampleSet = (ExampleSet)this.input.getData(ExampleSet.class);
        Example ex = exampleSet.getExample(0);
        Attributes attr = ex.getAttributes();
        TIntArrayList[] documentTokens = new TIntArrayList[exampleSet.size()];
        Attribute textAttribute = null;
        String colName = this.getParameterAsString(PARAMETER_TEXT_ATTRIBUTE);
        for (Attribute att : attr) {
            if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(att.getValueType(), 5)) continue;
            if (colName.equals("")) {
                textAttribute = att;
                continue;
            }
            if (!colName.equals(att.getName())) continue;
            textAttribute = att;
        }
        double[] labels = null;
        double[] predictions = null;
        TDoubleArrayList times = new TDoubleArrayList();
        TObjectIntHashMap<String> wordToId = new TObjectIntHashMap<String>();
        if (textAttribute != null) {
            TObjectIntHashMap<String> attToId = new TObjectIntHashMap<String>();
            ex = exampleSet.getExample(0);
            attr = ex.getAttributes();
            int id = 0;
            for (Attribute att : attr) {
                if (att == textAttribute) continue;
                attToId.put(att.getName().trim().toLowerCase(), id);
                ++id;
            }
            for (int i = 0; i < exampleSet.size(); ++i) {
                ex = exampleSet.getExample(i);
                attr = ex.getAttributes();
                String text = ex.getValueAsString(textAttribute);
                String[] tokens = text.split(" ");
                documentTokens[i] = new TIntArrayList();
                for (String token : tokens) {
                    if (!attToId.contains(token.trim().toLowerCase())) continue;
                    documentTokens[i].add(attToId.get(token.trim().toLowerCase()));
                }
            }
        } else {
            if (attr.getLabel() != null) {
                labels = new double[exampleSet.size()];
            }
            if (attr.getPredictedLabel() != null) {
                predictions = new double[exampleSet.size()];
            }
            ex = exampleSet.getExample(0);
            attr = ex.getAttributes();
            int j = 0;
            for (Attribute att : attr) {
                wordToId.put(att.getName().trim(), j);
                ++j;
            }
            for (int i = 0; i < exampleSet.size(); ++i) {
                documentTokens[i] = new TIntArrayList();
                ex = exampleSet.getExample(i);
                attr = ex.getAttributes();
                if (attr.getLabel() != null) {
                    labels[i] = ex.getLabel();
                    times.add(labels[i]);
                    if (labels[i] > this.maxTime) {
                        this.maxTime = labels[i];
                    }
                }
                if (attr.getPredictedLabel() != null) {
                    predictions[i] = ex.getPredictedLabel();
                }
                j = 0;
                for (Attribute att : attr) {
                    double frequ = 0.0;
                    frequ = ex.getValue(att);
                    if (frequ != 0.0) {
                        for (int k = 0; k < (int)frequ; ++k) {
                            documentTokens[i].add(wordToId.get(att.getName().trim()));
                        }
                    }
                    ++j;
                }
                documentTokens[i].shuffle(this.rn);
            }
        }
        ExampleSet examplesProbs = (ExampleSet)this.inputWords.getData(ExampleSet.class);
        Example ex2 = examplesProbs.getExample(0);
        Attributes atts = ex.getAttributes();
        numWords = examplesProbs.size();
        this.numTopics = 0;
        int i = 0;
        if (i < examplesProbs.size()) {
            ex2 = examplesProbs.getExample(i);
            atts = ex2.getAttributes();
            for (Attribute att : atts) {
                if (!att.getName().contains("Topic_") && !att.getName().contains("svd_")) continue;
                ++this.numTopics;
            }
        }
        TIntObjectHashMap<double[]> topicassigns = new TIntObjectHashMap<double[]>();
        TIntHashSet setI = new TIntHashSet();
        for (int i2 = 0; i2 < examplesProbs.size(); ++i2) {
            ex2 = examplesProbs.getExample(i2);
            atts = ex2.getAttributes();
            int j = 0;
            int nextI = i2;
            if (atts.get("Word") != null) {
                nextI = wordToId.get(ex2.getValueAsString(atts.get("Word")));
            }
            setI.add(nextI);
            double[] nn = new double[this.numTopics];
            for (Attribute att : atts) {
                if (!att.getName().contains("Topic_") && !att.getName().contains("svd_")) continue;
                nn[j] = ex2.getValue(att);
                ++j;
            }
            topicassigns.put(nextI, nn);
        }
        for (int te = 0; te < numTests; ++te) {
            double perplexity = 0.0;
            int allCounts = 0;
            for (int i3 = 0; i3 < documentTokens.length; ++i3) {
                int limit;
                TIntArrayList list = documentTokens[i3];
                TIntHashSet remove = new TIntHashSet();
                for (int position = 0; position < list.size(); ++position) {
                    int w = list.get(position);
                    if (setI.contains(w)) continue;
                    remove.add(w);
                }
                list.removeAll(remove);
                if (list.size() < 1) continue;
                list.shuffle(this.rn);
                double[] props = new double[list.size()];
                props[0] = 1.0 / (double)numWords;
                double[] cosines = new double[list.size()];
                double[] minCosines = new double[list.size()];
                double[] sumCosines = new double[list.size()];
                for (limit = 1; limit < list.size(); ++limit) {
                    double[] C = new double[this.numTopics];
                    for (int position = 0; position < limit; ++position) {
                        int w = list.get(position);
                        for (int index = 0; index < this.numTopics; ++index) {
                            if (!setI.contains(w)) continue;
                            int n = index;
                            C[n] = C[n] + ((double[])topicassigns.get(w))[index];
                        }
                    }
                    int nw = list.get(limit);
                    cosines[limit] = this.cos(C, (double[])topicassigns.get(nw));
                    minCosines[limit] = this.minCos(C, (double[][])topicassigns.values((V[])new double[topicassigns.size()][this.numTopics]));
                    sumCosines[limit] = this.sumCos(C, (double[][])topicassigns.values((V[])new double[topicassigns.size()][this.numTopics]));
                    props[limit] = (this.cos((double[])topicassigns.get(nw), C) - minCosines[limit] + 0.001) / (sumCosines[limit] - (double)topicassigns.size() * (minCosines[limit] - 0.001));
                }
                for (limit = 0; limit < list.size(); ++limit) {
                    if (supervised && predictions != null) {
                        perplexity += Math.log(props[limit]) + Math.abs(predictions[i3] - labels[i3]);
                        continue;
                    }
                    perplexity += Math.log(props[limit]);
                }
            }
            System.out.println(allCounts);
            System.out.println(perplexity);
            DataRow row = factory.create(table.getNumberOfAttributes());
            table.addDataRow(row);
            row.set((Attribute)attributeList.get(0), perplexity);
        }
        SimpleExampleSet set = new SimpleExampleSet((ExampleTable)table);
        this.output.deliver((IOObject)set);
    }

    public double minCos(double[] x, double[][] Y) {
        double min = Double.POSITIVE_INFINITY;
        for (int i = 0; i < Y.length; ++i) {
            double tmp = this.cos(x, Y[i]);
            if (!(tmp < min)) continue;
            min = tmp;
        }
        return min;
    }

    public double sumCos(double[] x, double[][] Y) {
        double sum = 0.0;
        for (int i = 0; i < Y.length; ++i) {
            sum += this.cos(x, Y[i]);
        }
        return sum;
    }

    public double cos(double[] x, double[] y) {
        double nx = 0.0;
        double ny = 0.0;
        double dot = 0.0;
        for (int i = 0; i < x.length; ++i) {
            nx += x[i] * x[i];
            ny += y[i] * y[i];
            dot += x[i] * y[i];
        }
        nx = Math.sqrt(nx);
        ny = Math.sqrt(ny);
        return dot / (nx * ny);
    }

    public int[] getDiscrete(int num, double[] probs) {
        int i;
        double sum = 0.0;
        for (i = 0; i < probs.length; ++i) {
            sum += probs[i];
        }
        i = 0;
        while (i < probs.length) {
            int n = i++;
            probs[n] = probs[n] / sum;
        }
        double pr = 0.0;
        int[] res = new int[num];
        for (int i2 = 0; i2 < num; ++i2) {
            int j = 0;
            double p = this.rn.nextDouble();
            for (pr = probs[0]; pr < p; pr += probs[++j]) {
            }
            res[i2] = j;
        }
        return res;
    }

    public List<ParameterType> getParameterTypes() {
        List types = super.getParameterTypes();
        types.add(new ParameterTypeInt(PARAMETER_NUMITERATIONS, "Number of Iterations for Samplings.", 1, Integer.MAX_VALUE, 2000));
        types.add(new ParameterTypeInt(PARAMETER_NUMTESTS, "Number of Iterations for Samplings.", 1, Integer.MAX_VALUE, 20));
        types.add(new ParameterTypeInt(PARAMETER_NUMTOPICS, "Number of Topics.", 1, Integer.MAX_VALUE, 5));
        types.add(new ParameterTypeDouble(PARAMETER_ALPHA, "Alpha", 0.0, Double.MAX_VALUE, 0.25));
        types.add(new ParameterTypeString(PARAMETER_TEXT_ATTRIBUTE, "Attribute name of text columns of interest.", ""));
        types.add(new ParameterTypeDouble(PARAMETER_GAMMA, "Gamma", 0.0, Double.MAX_VALUE, 1.0));
        types.add(new ParameterTypeBoolean(PARAMETER_SUPER, "Perform supervised LDA with numinal (Gaussian) or numeric (Beta) labels. ", false, false));
        types.addAll(RandomGenerator.getRandomGeneratorParameters((Operator)this));
        return types;
    }

    public static void main(String[] args) {
    }
}

