/*
 * Decompiled with CFR 0.152.
 */
package com.rapidminer.kobra.topicmodels;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.SimpleExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DataRow;
import com.rapidminer.example.table.DataRowFactory;
import com.rapidminer.example.table.ExampleTable;
import com.rapidminer.example.table.MemoryExampleTable;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeString;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import org.aksw.palmetto.Coherence;
import org.aksw.palmetto.Palmetto;
import org.aksw.palmetto.corpus.CorpusAdapter;

public class LDAEvaluationPalmettoOperator
extends Operator {
    public static String PARAMETER_ATTRIBUTE = "attribute";
    public static String PARAMETER_SOURCE = "path to index";
    public static String[] METHODS = new String[]{"UCI", "UMass", "NPMI", "Arith. mean NPMI", "Arith. mean Fitelson", "Overlap"};
    public static String METHOD = "method";
    public static int UCI = 0;
    public static int UMass = 1;
    public static int NPMI = 2;
    public static int ArNPMI = 3;
    public static int ArFitelson = 4;
    public static int Overlap = 5;
    int idx = 0;
    static String PARAMETER_NUMITERATIONS = "number of top words";
    int iters = 10;
    int numTopics = 4;
    private final InputPort inputWords = (InputPort)this.getInputPorts().createPort("example set topic-word distributions");
    private final OutputPort output = (OutputPort)this.getOutputPorts().createPort("output neg log likelihoods");
    double norm = Math.log10(Math.E);

    public LDAEvaluationPalmettoOperator(OperatorDescription description) {
        super(description);
    }

    public void doWork() throws OperatorException {
        this.iters = this.getParameterAsInt(PARAMETER_NUMITERATIONS);
        this.numTopics = 0;
        this.idx = this.getParameterAsInt(METHOD);
        String indexPath = this.getParameterAsString(PARAMETER_SOURCE);
        ArrayList<Attribute> attributeList = new ArrayList<Attribute>();
        attributeList.add(AttributeFactory.createAttribute((String)"coherence", (int)2));
        MemoryExampleTable table = new MemoryExampleTable(attributeList);
        DataRowFactory factory = new DataRowFactory(0, '.');
        int numWords = 0;
        ExampleSet examplesProbs = (ExampleSet)this.inputWords.getData(ExampleSet.class);
        numWords = examplesProbs.size();
        String[] words = new String[numWords];
        for (int i = 0; i < examplesProbs.size(); ++i) {
            Example ex = examplesProbs.getExample(i);
            words[i] = ex.getNominalValue(ex.getAttributes().get("Word"));
        }
        Example ex2 = examplesProbs.getExample(0);
        Attributes atts = ex2.getAttributes();
        int i = 0;
        if (i < examplesProbs.size()) {
            ex2 = examplesProbs.getExample(i);
            atts = ex2.getAttributes();
            boolean j = false;
            for (Attribute att : atts) {
                if (!att.getName().contains("Topic_")) continue;
                ++this.numTopics;
            }
        }
        double[][] topicassigns = new double[numWords][this.numTopics];
        for (int i2 = 0; i2 < examplesProbs.size(); ++i2) {
            ex2 = examplesProbs.getExample(i2);
            atts = ex2.getAttributes();
            int j = 0;
            for (Attribute att : atts) {
                if (!att.getName().contains("Topic_")) continue;
                topicassigns[i2][j] = ex2.getValue(att);
                ++j;
            }
        }
        int[][] maxProbs = new int[this.numTopics][this.iters];
        for (int i3 = 0; i3 < this.numTopics; ++i3) {
            for (int j = 0; j < this.iters; ++j) {
                double max = -1.0;
                int idx = -1;
                for (int k = 0; k < numWords; ++k) {
                    if (!(topicassigns[k][i3] > max)) continue;
                    idx = k;
                    max = topicassigns[k][i3];
                }
                topicassigns[idx][i3] = -1.0;
                maxProbs[i3][j] = idx;
            }
        }
        double res = 0.0;
        String[][] topWords = new String[this.numTopics][this.iters];
        for (int i4 = 0; i4 < this.numTopics; ++i4) {
            for (int j = 0; j < this.iters; ++j) {
                topWords[i4][j] = words[maxProbs[i4][j]].trim();
            }
        }
        if (this.idx == Overlap) {
            HashSet<String> hs = new HashSet<String>();
            double o = 0.0;
            for (int i5 = 0; i5 < this.numTopics; ++i5) {
                o = 0.0;
                for (int j = 0; j < this.iters; ++j) {
                    if (hs.contains(topWords[i5][j])) {
                        o += 1.0;
                        continue;
                    }
                    hs.add(topWords[i5][j]);
                }
                DataRow row = factory.create(table.getNumberOfAttributes());
                table.addDataRow(row);
                row.set((Attribute)attributeList.get(0), o / (double)this.iters);
            }
            SimpleExampleSet set = new SimpleExampleSet((ExampleTable)table);
            this.output.deliver((IOObject)set);
            return;
        }
        String calcType = "uci";
        switch (this.idx) {
            case 0: {
                calcType = "uci";
                break;
            }
            case 1: {
                calcType = "umass";
                break;
            }
            case 2: {
                calcType = "npmi";
                break;
            }
        }
        CorpusAdapter corpusAdapter = Palmetto.getCorpusAdapter(calcType, indexPath);
        if (corpusAdapter == null) {
            return;
        }
        Coherence coherence = Palmetto.getCoherence(calcType, corpusAdapter);
        if (coherence == null) {
            return;
        }
        double[] coherences = coherence.calculateCoherences(topWords);
        corpusAdapter.close();
        res = 0.0;
        for (int i6 = 0; i6 < coherences.length; ++i6) {
            res = coherences[i6];
            DataRow row = factory.create(table.getNumberOfAttributes());
            table.addDataRow(row);
            row.set((Attribute)attributeList.get(0), res);
        }
        SimpleExampleSet set = new SimpleExampleSet((ExampleTable)table);
        this.output.deliver((IOObject)set);
    }

    public List<ParameterType> getParameterTypes() {
        List types = super.getParameterTypes();
        types.add(new ParameterTypeInt(PARAMETER_NUMITERATIONS, "Number of top words to use.", 1, Integer.MAX_VALUE, 10));
        types.add(new ParameterTypeString(PARAMETER_SOURCE, "Path to lucene index for Palmetto.", "/home/share03/datensaetze/wikipedia_lucene_index/wikipedia_bd"));
        types.add(new ParameterTypeCategory(METHOD, "The type of the kernel.", METHODS, UCI));
        return types;
    }

    public static void main(String[] args) {
    }
}

