/*
 * Decompiled with CFR 0.152.
 */
package com.rapidminer.kobra.transformation;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.SimpleExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DataRow;
import com.rapidminer.example.table.DataRowFactory;
import com.rapidminer.example.table.ExampleTable;
import com.rapidminer.example.table.MemoryExampleTable;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.ports.metadata.GenerateNewMDRule;
import com.rapidminer.operator.ports.metadata.MDTransformationRule;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeFile;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.tools.Ontology;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import java.io.File;
import java.util.ArrayList;
import java.util.List;

public class TreeTaggerOperator
extends Operator {
    public static String PARAMETER_MODEL_FILE = "model";
    public static String PARAMETER_LANGUAGE = "language";
    public static String[] LANGUAGES = new String[]{"german", "english"};
    public static int GERMAN = 0;
    public static int ENGLISH = 1;
    public static String PARAMETER_APPEND = "append";
    public static String PARAMETER_TEXT_ATTRIBUTE = "text attribute";
    private final InputPort exampleSetInput = this.getInputPorts().createPort("example set", ExampleSet.class);
    private final OutputPort output = (OutputPort)this.getOutputPorts().createPort("example set");

    public TreeTaggerOperator(OperatorDescription description) {
        super(description);
        GenerateNewMDRule n = new GenerateNewMDRule(this.output, ExampleSet.class){

            public MetaData modifyMetaData(MetaData unmodifiedMetaData) {
                return super.modifyMetaData(unmodifiedMetaData);
            }
        };
        this.getTransformer().addRule((MDTransformationRule)n);
    }

    public void doWork() throws OperatorException {
        int l = this.getParameterAsInt(PARAMETER_LANGUAGE);
        ExampleSet exampleSet = (ExampleSet)this.exampleSetInput.getData(ExampleSet.class);
        LexicalizedParser lp = null;
        File file = this.getParameterAsFile(PARAMETER_MODEL_FILE);
        if (!file.exists()) {
            lp = l == GERMAN ? LexicalizedParser.loadModel("/home/poelitz/Downloads/stanford-parser-full-2014-01-04/edu/stanford/nlp/models/lexparser/germanPCFG.ser.gz", "-maxLength", "200") : LexicalizedParser.loadModel("/home/poelitz/Downloads/stanford-parser-full-2014-01-04/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz", "-maxLength", "200");
        } else {
            String f = file.getAbsolutePath();
            lp = LexicalizedParser.loadModel(f, "-maxLength", "200");
        }
        Attribute textAttribute = null;
        Example tmpex = exampleSet.getExample(0);
        Attributes tmpattr = tmpex.getAttributes();
        String colName = this.getParameterAsString(PARAMETER_TEXT_ATTRIBUTE);
        for (Attribute att : tmpattr) {
            if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(att.getValueType(), 5)) continue;
            if (colName.equals("")) {
                textAttribute = att;
                continue;
            }
            if (!colName.equals(att.getName())) continue;
            textAttribute = att;
        }
        Attribute newAtt = AttributeFactory.createAttribute((String)"Parse Tree", (int)5);
        ArrayList<Attribute> attributeList = new ArrayList<Attribute>();
        attributeList.add(newAtt);
        boolean append = this.getParameterAsBoolean(PARAMETER_APPEND);
        MemoryExampleTable table = null;
        DataRowFactory factory = null;
        if (!append) {
            table = new MemoryExampleTable(attributeList);
            factory = new DataRowFactory(0, '.');
        } else {
            exampleSet.getExampleTable().addAttribute(newAtt);
            exampleSet.getAttributes().addRegular(newAtt);
        }
        for (int i = 0; i < exampleSet.size(); ++i) {
            Example ex = exampleSet.getExample(i);
            String text = ex.getValueAsString(textAttribute);
            String tagged = lp.parse(text).flatten().toString();
            if (!append) {
                DataRow row = factory.create(table.getNumberOfAttributes());
                table.addDataRow(row);
                row.set((Attribute)attributeList.get(0), (double)((Attribute)attributeList.get(0)).getMapping().mapString(tagged));
                continue;
            }
            ex.setValue(newAtt, (double)((Attribute)attributeList.get(0)).getMapping().mapString(tagged));
        }
        if (!append) {
            SimpleExampleSet set = new SimpleExampleSet((ExampleTable)table);
            this.output.deliver((IOObject)set);
        } else {
            this.output.deliver((IOObject)exampleSet);
        }
    }

    public List<ParameterType> getParameterTypes() {
        List types = super.getParameterTypes();
        types.add(new ParameterTypeCategory(PARAMETER_LANGUAGE, "Specify language for the parser.", LANGUAGES, 0));
        types.add(new ParameterTypeString(PARAMETER_TEXT_ATTRIBUTE, "Attribute name of text columns of interest.", ""));
        types.add(new ParameterTypeBoolean(PARAMETER_APPEND, "Shall sequence only be appended as additional attribute?", true));
        types.add(new ParameterTypeFile(PARAMETER_MODEL_FILE, "Choose File", ".gz", "/home/poelitz/Downloads/stanford-parser-full-2014-01-04/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"));
        return types;
    }
}

