/*
 * Decompiled with CFR 0.152.
 */
package com.rapidminer.kobra.topicmodels;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.SimpleExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DataRow;
import com.rapidminer.example.table.DataRowFactory;
import com.rapidminer.example.table.ExampleTable;
import com.rapidminer.example.table.MemoryExampleTable;
import com.rapidminer.kobra.data.CCSMatrix;
import com.rapidminer.kobra.topicmodels.SamplersDMRLDA;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeDirectory;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.tools.RandomGenerator;
import gnu.trove.list.array.TIntArrayList;
import gnu.trove.map.hash.TIntObjectHashMap;
import gnu.trove.map.hash.TObjectIntHashMap;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;

public class DMRLDAOperator
extends Operator {
    static String PARAMETER_NUMITERATIONS = "iterations";
    static String PARAMETER_NUMTOPICS = "number_of_topics";
    static String PARAMETER_ALPHA = "alpha";
    static String PARAMETER_BETA = "beta";
    static String PARAMETER_GROUP = "group";
    static String PARAMETER_SIGMA = "sigma";
    static String PARAMETER_LAMBDA = "lambda";
    int iters = 2000;
    int numTopics = 4;
    double alpha = 0.25;
    double beta = 0.1;
    static String PARAMETER_DFR = "dfr";
    static String PARAMETER_PATH = "path";
    String path = "/home/poelitz/work/Datasets/ResultData/acl2015/";
    private final InputPort input = (InputPort)this.getInputPorts().createPort("example set of documents as Bag-of-Words vectors with term occurrences");
    private final InputPort inputGroup = (InputPort)this.getInputPorts().createPort("example set of groups for each document (optional)");
    private final InputPort inputDocFeatures = (InputPort)this.getInputPorts().createPort("example set containing document features");
    private final OutputPort outputWords = (OutputPort)this.getOutputPorts().createPort("example set of word distributions for the topics");
    private final OutputPort outputDocs = (OutputPort)this.getOutputPorts().createPort("example set of topic distributions for the documents");
    private final OutputPort outputGroups = (OutputPort)this.getOutputPorts().createPort("example set of counts of topics assigned to groups");
    int topK = 40;

    public DMRLDAOperator(OperatorDescription description) {
        super(description);
    }

    public void doWork() throws OperatorException {
        int i;
        boolean locSeed = this.getParameterAsBoolean("use_local_random_seed");
        int seed = this.getParameterAsInt("local_random_seed");
        this.iters = this.getParameterAsInt(PARAMETER_NUMITERATIONS);
        this.numTopics = this.getParameterAsInt(PARAMETER_NUMTOPICS);
        this.alpha = this.getParameterAsDouble(PARAMETER_ALPHA);
        this.beta = this.getParameterAsDouble(PARAMETER_BETA);
        this.path = this.getParameterAsString(PARAMETER_PATH);
        TIntArrayList wordToken = new TIntArrayList();
        TIntArrayList docToken = new TIntArrayList();
        int numWords = 0;
        ExampleSet exampleSet = (ExampleSet)this.input.getData(ExampleSet.class);
        Example ex = exampleSet.getExample(0);
        Attributes attr = ex.getAttributes();
        numWords = attr.size();
        String[] words = new String[numWords];
        for (int i2 = 0; i2 < exampleSet.size(); ++i2) {
            int docId = i2;
            ex = exampleSet.getExample(i2);
            attr = ex.getAttributes();
            boolean nextAtt = false;
            int j = 0;
            for (Attribute att : attr) {
                words[j] = att.getName();
                int wordId = j++;
                double frequ = 0.0;
                frequ = ex.getValue(att);
                if (frequ == 0.0) continue;
                for (int k = 0; k < (int)frequ; ++k) {
                    docToken.add(docId);
                    wordToken.add(wordId);
                }
            }
        }
        double[][] docFeatures = null;
        ExampleSet docFeaturesSet = (ExampleSet)this.inputDocFeatures.getDataOrNull(ExampleSet.class);
        if (docFeaturesSet != null) {
            ex = docFeaturesSet.getExample(0);
            attr = ex.getAttributes();
            docFeatures = new double[docFeaturesSet.size()][attr.size()];
            for (int i3 = 0; i3 < docFeaturesSet.size(); ++i3) {
                ex = docFeaturesSet.getExample(i3);
                attr = ex.getAttributes();
                int j = 0;
                for (Attribute att : attr) {
                    docFeatures[i3][j] = ex.getValue(att);
                    ++j;
                }
            }
        }
        SamplersDMRLDA sampler = new SamplersDMRLDA();
        sampler.features = docFeatures;
        sampler.sigma = this.getParameterAsDouble(PARAMETER_SIGMA);
        sampler.lambda = this.getParameterAsDouble(PARAMETER_LAMBDA);
        sampler.numFeatures = docFeatures[0].length;
        sampler.init(docToken.toArray(), wordToken.toArray(), this.numTopics, numWords, exampleSet.size(), this.iters, this.beta, this.alpha, locSeed, seed);
        sampler.GibbsSampling();
        double[][] probs = sampler.documentDistribution();
        boolean dfr = this.getParameterAsBoolean(PARAMETER_DFR);
        if (dfr) {
            this.writeDT(probs);
        }
        ArrayList<Attribute> attributeList = new ArrayList<Attribute>();
        attributeList.add(AttributeFactory.createAttribute((String)"Doc", (int)2));
        attributeList.add(AttributeFactory.createAttribute((String)"Topic", (int)2));
        for (int i4 = 0; i4 < this.numTopics; ++i4) {
            attributeList.add(AttributeFactory.createAttribute((String)("Topic_" + i4), (int)2));
        }
        MemoryExampleTable table = new MemoryExampleTable(attributeList);
        DataRowFactory factory = new DataRowFactory(0, '.');
        for (int i5 = 0; i5 < exampleSet.size(); ++i5) {
            DataRow row = factory.create(table.getNumberOfAttributes());
            table.addDataRow(row);
            row.set((Attribute)attributeList.get(0), (double)(i5 + 1));
            int top = -1;
            double max = 0.0;
            for (int j = 0; j < this.numTopics; ++j) {
                if (probs[j][i5] > max) {
                    max = probs[j][i5];
                    top = j;
                }
                row.set((Attribute)attributeList.get(2 + j), probs[j][i5]);
            }
            row.set((Attribute)attributeList.get(1), (double)top);
        }
        SimpleExampleSet set = new SimpleExampleSet((ExampleTable)table);
        this.outputDocs.deliver((IOObject)set);
        probs = sampler.wordDistribution();
        if (dfr) {
            this.writeTW(words, probs, this.alpha);
            this.writeTopWords(words, probs);
        }
        attributeList = new ArrayList();
        attributeList.add(AttributeFactory.createAttribute((String)"Word", (int)5));
        attributeList.add(AttributeFactory.createAttribute((String)"Word_id", (int)2));
        attributeList.add(AttributeFactory.createAttribute((String)"Topic", (int)2));
        for (i = 0; i < this.numTopics; ++i) {
            attributeList.add(AttributeFactory.createAttribute((String)("Topic_" + i), (int)2));
        }
        table = new MemoryExampleTable(attributeList);
        factory = new DataRowFactory(0, '.');
        for (i = 0; i < numWords; ++i) {
            DataRow row = factory.create(table.getNumberOfAttributes());
            table.addDataRow(row);
            row.set((Attribute)attributeList.get(0), (double)((Attribute)attributeList.get(0)).getMapping().mapString(words[i]));
            row.set((Attribute)attributeList.get(1), (double)(i + 1));
            int top = -1;
            double max = 0.0;
            for (int j = 0; j < this.numTopics; ++j) {
                if (probs[j][i] > max) {
                    max = probs[j][i];
                    top = j;
                }
                row.set((Attribute)attributeList.get(3 + j), probs[j][i]);
            }
            row.set((Attribute)attributeList.get(2), (double)top);
        }
        set = new SimpleExampleSet((ExampleTable)table);
        this.outputWords.deliver((IOObject)set);
        String gAtt = this.getParameter(PARAMETER_GROUP);
        ExampleSet exGroup = (ExampleSet)this.inputGroup.getDataOrNull(ExampleSet.class);
        if (gAtt != "" && exGroup != null && exGroup.getAttributes().get(gAtt) != null) {
            int[] counts;
            int i6;
            Attribute att = exGroup.getAttributes().get(gAtt);
            String[] groups = new String[exampleSet.size()];
            int[] assignments = sampler.getTokenToTopic();
            TObjectIntHashMap<String> map = new TObjectIntHashMap<String>();
            TIntObjectHashMap<int[]> map2 = new TIntObjectHashMap<int[]>();
            int nextGroup = 1;
            for (i6 = 0; i6 < exampleSet.size(); ++i6) {
                ex = exGroup.getExample(i6);
                String next = ex.getValueAsString(att);
                if (!map.contains(next)) {
                    counts = new int[this.numTopics];
                    counts[assignments[i6]] = 1;
                    map2.put(nextGroup, counts);
                    map.put(next, nextGroup);
                    ++nextGroup;
                    continue;
                }
                counts = (int[])map2.get(map.get(next));
                counts[assignments[i6]] = counts[assignments[i6]] + 1;
            }
            attributeList = new ArrayList();
            attributeList.add(AttributeFactory.createAttribute((String)"Group", (int)5));
            for (i6 = 0; i6 < this.numTopics; ++i6) {
                attributeList.add(AttributeFactory.createAttribute((String)("Topic_" + (i6 + 1)), (int)3));
            }
            table = new MemoryExampleTable(attributeList);
            factory = new DataRowFactory(0, '.');
            for (i6 = 0; i6 < map.keys().length; ++i6) {
                int j;
                String key = (String)map.keys()[i6];
                counts = (int[])map2.get(map.get(key));
                DataRow row = factory.create(table.getNumberOfAttributes());
                table.addDataRow(row);
                row.set((Attribute)attributeList.get(0), (double)((Attribute)attributeList.get(0)).getMapping().mapString(key));
                double sum = 0.0;
                for (j = 0; j < counts.length; ++j) {
                    sum += (double)counts[j];
                }
                for (j = 0; j < counts.length; ++j) {
                    row.set((Attribute)attributeList.get(j + 1), (double)counts[j] / sum);
                }
            }
            set = new SimpleExampleSet((ExampleTable)table);
            this.outputGroups.deliver((IOObject)set);
        }
    }

    public void writeDT(int[] topics, double[] pr) {
        String r = "{";
        String i = "\"i\":[" + topics[0];
        String p = "\"p\":[0";
        String x = "\"x\":[" + pr[0];
        for (int d = 1; d < topics.length; ++d) {
            i = i + "," + topics[d];
            p = p + "," + d;
            x = x + "," + (int)(pr[d] * 1000.0);
        }
        r = r + i + "]," + p + "]," + x + "]}";
        System.out.println(r);
    }

    public void writeDT(double[][] td) {
        double[][] dt = new double[td[0].length][td.length];
        for (int i = 0; i < dt.length; ++i) {
            for (int j = 0; j < dt[i].length; ++j) {
                dt[i][j] = (int)(td[j][i] * 100.0);
            }
        }
        CCSMatrix ma = CCSMatrix.from2DArray(dt);
        int[] cp = ma.columnPointers;
        int[] ri = ma.rowIndices;
        double[] v = ma.values;
        String file = this.path + "dt.json";
        String r = "{";
        String i = "\"i\": [" + ri[0] + " ";
        for (int ind = 1; ind < ri.length; ++ind) {
            i = i + ", " + ri[ind];
        }
        String p = "\"p\": [" + cp[0] + " ";
        for (int poi = 1; poi < cp.length; ++poi) {
            p = p + ", " + cp[poi];
        }
        String x = "\"x\": [" + (int)v[0] + " ";
        for (int val = 1; val < v.length; ++val) {
            x = x + ", " + (int)v[val];
        }
        r = r + i + "]," + p + "]," + x + "]}";
        this.writeAndZip(r, file);
    }

    public void writeAndZip(String r, String file) {
        Writer bw = null;
        try {
            bw = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(file), "UTF-8"));
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        try {
            bw.write(r);
        }
        catch (IOException e1) {
            e1.printStackTrace();
        }
        try {
            ((BufferedWriter)bw).close();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        try {
            int count;
            BufferedInputStream origin = null;
            FileOutputStream dest = new FileOutputStream(file + ".zip");
            ZipOutputStream out2 = new ZipOutputStream(new BufferedOutputStream(dest));
            byte[] data = new byte[2048];
            FileInputStream fi = new FileInputStream(file);
            origin = new BufferedInputStream(fi, 2048);
            ZipEntry entry = new ZipEntry(file.substring(file.lastIndexOf("/") + 1, file.length()));
            out2.putNextEntry(entry);
            while ((count = origin.read(data, 0, 2048)) != -1) {
                out2.write(data, 0, count);
            }
            origin.close();
            out2.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void writeTopWords(String[] words, double[][] tw) {
        int t;
        Word[][] myWords = new Word[tw.length][tw[0].length];
        for (t = 0; t < tw.length; ++t) {
            for (int i = 0; i < tw[t].length; ++i) {
                Word w = new Word();
                w.id = i;
                w.weight = tw[t][i];
                w.word = words[i];
                myWords[t][i] = w;
            }
        }
        for (t = 0; t < tw.length; ++t) {
            Object[] tmp = myWords[t];
            Arrays.sort(tmp);
        }
        try {
            PrintStream ps = new PrintStream((OutputStream)new FileOutputStream(this.path + "topWords.txt", false), true, "UTF-8");
            System.setOut(ps);
        }
        catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        for (int i = 0; i < this.topK && i < words.length; ++i) {
            String next = "";
            for (int t2 = 0; t2 < tw.length; ++t2) {
                next = next + t2 + "," + myWords[t2][i].toString() + ",";
            }
            System.out.println(next);
        }
        System.setOut(System.out);
    }

    public void writeTW(String[] words, double[][] tw, double alpha) {
        int t;
        String twS = "\"tw\":[";
        String alphaS = "{\"alpha\":[" + alpha;
        for (t = 1; t < tw.length; ++t) {
            alphaS = alphaS + "," + alpha;
        }
        for (t = 0; t < tw.length; ++t) {
            double[] w = new double[tw[t].length];
            for (int i = 0; i < tw[t].length; ++i) {
                w[i] = tw[t][i];
            }
            Arrays.sort(w);
            if (this.topK >= w.length) {
                // empty if block
            }
            double min = w[w.length - this.topK];
            String wordsS = "\"words\":[";
            String weightsS = "{\"weights\":[";
            for (int i = 0; i < tw[t].length; ++i) {
                if (!(tw[t][i] >= min)) continue;
                wordsS = wordsS + "\"" + words[i] + "\",";
                weightsS = weightsS + tw[t][i] + ",";
            }
            wordsS = wordsS.substring(0, wordsS.length() - 1);
            wordsS = wordsS + "]}";
            weightsS = weightsS.substring(0, weightsS.length() - 1);
            weightsS = weightsS + "],";
            twS = twS + weightsS + wordsS + ",";
        }
        alphaS = alphaS + "],";
        twS = twS.substring(0, twS.length() - 1);
        String r = alphaS + twS + "]}";
        this.writeAndZip(r, this.path + "tw.json");
    }

    public List<ParameterType> getParameterTypes() {
        List types = super.getParameterTypes();
        types.add(new ParameterTypeInt(PARAMETER_NUMITERATIONS, "Number of Iterations for Gibbs Sampling.", 1, Integer.MAX_VALUE, 2000));
        types.add(new ParameterTypeInt(PARAMETER_NUMTOPICS, "Number of Topics.", 1, Integer.MAX_VALUE, 5));
        types.add(new ParameterTypeDouble(PARAMETER_ALPHA, "Alpha metaparameter for Dirichlet", 0.0, Double.MAX_VALUE, 0.25));
        types.add(new ParameterTypeDouble(PARAMETER_BETA, "Beta metaparameter for Dirichlet", 0.0, Double.MAX_VALUE, 0.1));
        types.add(new ParameterTypeDouble(PARAMETER_LAMBDA, "lambda: weight for l1 regularization for DMR", 0.0, Double.MAX_VALUE, 0.1));
        types.add(new ParameterTypeDouble(PARAMETER_SIGMA, "sigma: variance of weights for document features in DMR", 0.0, Double.MAX_VALUE, 0.1));
        types.add(new ParameterTypeString(PARAMETER_GROUP, "Attribute name for grouping the word counts."));
        types.add(new ParameterTypeBoolean(PARAMETER_DFR, "Write results out for dfr browser", true, false));
        types.add(new ParameterTypeDirectory(PARAMETER_PATH, "Path for dfr files.", "/home/poelitz/work/Datasets/ResultData/acl2015/"));
        types.addAll(RandomGenerator.getRandomGeneratorParameters((Operator)this));
        return types;
    }

    public static void main(String[] args) {
    }

    class Word
    implements Comparable<Word> {
        public String word = "";
        public double weight = 0.0;
        public int id = -1;

        Word() {
        }

        @Override
        public int compareTo(Word o) {
            if (this.weight == o.weight) {
                return -this.word.compareTo(o.word);
            }
            return this.weight < o.weight ? 1 : -1;
        }

        public String toString() {
            return this.word + "," + this.weight;
        }
    }
}

