/*
 * Decompiled with CFR 0.152.
 */
package com.rapidminer.kobra.transformation;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.SimpleExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DataRow;
import com.rapidminer.example.table.DataRowFactory;
import com.rapidminer.example.table.ExampleTable;
import com.rapidminer.example.table.MemoryExampleTable;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.ports.metadata.GenerateNewMDRule;
import com.rapidminer.operator.ports.metadata.MDTransformationRule;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeFile;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.tools.Ontology;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import java.io.File;
import java.util.ArrayList;
import java.util.List;

public class PosTaggerOperator
extends Operator {
    public static String PARAMETER_MODEL_FILE = "model";
    public static String PARAMETER_LANGUAGE = "language";
    public static String[] LANGUAGES = new String[]{"german", "english"};
    public static int GERMAN = 0;
    public static int ENGLISH = 1;
    public static String PARAMETER_TEXT_ATTRIBUTE = "text attribute";
    public static String PARAMETER_APPEND = "append";
    private final InputPort exampleSetInput = this.getInputPorts().createPort("example set", ExampleSet.class);
    private final OutputPort output = (OutputPort)this.getOutputPorts().createPort("example set");

    public PosTaggerOperator(OperatorDescription description) {
        super(description);
        GenerateNewMDRule n = new GenerateNewMDRule(this.output, ExampleSet.class){

            public MetaData modifyMetaData(MetaData unmodifiedMetaData) {
                return super.modifyMetaData(unmodifiedMetaData);
            }
        };
        this.getTransformer().addRule((MDTransformationRule)n);
    }

    public void doWork() throws OperatorException {
        int l = this.getParameterAsInt(PARAMETER_LANGUAGE);
        ExampleSet exampleSet = (ExampleSet)this.exampleSetInput.getData(ExampleSet.class);
        MaxentTagger tagger = null;
        File file = this.getParameterAsFile(PARAMETER_MODEL_FILE);
        if (!file.exists()) {
            tagger = l == GERMAN ? new MaxentTagger("/home/poelitz/Downloads/stanford-postagger-full-2014-10-31/models/german-fast.tagger") : new MaxentTagger("/home/poelitz/Downloads/stanford-postagger-full-2014-10-31/models/english-bidirectional-distsim.tagger");
        } else {
            String f = file.getAbsolutePath();
            tagger = new MaxentTagger(f);
        }
        Attribute textAttribute = null;
        Example tmpex = exampleSet.getExample(0);
        Attributes tmpattr = tmpex.getAttributes();
        String colName = this.getParameterAsString(PARAMETER_TEXT_ATTRIBUTE);
        for (Attribute att : tmpattr) {
            if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(att.getValueType(), 5)) continue;
            if (colName.equals("")) {
                textAttribute = att;
                continue;
            }
            if (!colName.equals(att.getName())) continue;
            textAttribute = att;
        }
        Attribute newAtt = AttributeFactory.createAttribute((String)"PoS Sequence", (int)5);
        ArrayList<Attribute> attributeList = new ArrayList<Attribute>();
        attributeList.add(newAtt);
        boolean append = this.getParameterAsBoolean(PARAMETER_APPEND);
        MemoryExampleTable table = null;
        DataRowFactory factory = null;
        if (!append) {
            table = new MemoryExampleTable(attributeList);
            factory = new DataRowFactory(0, '.');
        } else {
            exampleSet.getExampleTable().addAttribute(newAtt);
            exampleSet.getAttributes().addRegular(newAtt);
        }
        for (int i = 0; i < exampleSet.size(); ++i) {
            Example ex = exampleSet.getExample(i);
            String text = ex.getValueAsString(textAttribute);
            boolean containsCo = text.contains("&&");
            text = text.replaceAll("[_&]", "");
            String tagged = tagger.tagString(text);
            if (!append) {
                DataRow row = factory.create(table.getNumberOfAttributes());
                table.addDataRow(row);
                if (containsCo) {
                    row.set((Attribute)attributeList.get(0), (double)((Attribute)attributeList.get(0)).getMapping().mapString("CO " + tagged));
                    continue;
                }
                row.set((Attribute)attributeList.get(0), (double)((Attribute)attributeList.get(0)).getMapping().mapString(tagged));
                continue;
            }
            String[] tokens = tagged.split(" ");
            String pos = containsCo ? "CO " : "";
            for (String s : tokens) {
                String[] tks = s.split("_");
                pos = pos + " " + tks[1];
            }
            ex.setValue(newAtt, (double)((Attribute)attributeList.get(0)).getMapping().mapString(pos));
        }
        if (!append) {
            SimpleExampleSet set = new SimpleExampleSet((ExampleTable)table);
            this.output.deliver((IOObject)set);
        } else {
            this.output.deliver((IOObject)exampleSet);
        }
    }

    public List<ParameterType> getParameterTypes() {
        List types = super.getParameterTypes();
        types.add(new ParameterTypeCategory(PARAMETER_LANGUAGE, "Specify language for the parser.", LANGUAGES, 0));
        types.add(new ParameterTypeString(PARAMETER_TEXT_ATTRIBUTE, "Attribute name of text columns of interest.", ""));
        types.add(new ParameterTypeBoolean(PARAMETER_APPEND, "Shall sequence only be appended as additional attribute?", true));
        types.add(new ParameterTypeFile(PARAMETER_MODEL_FILE, "Choose File", ".tagger", "/home/poelitz/Downloads/stanford-postagger-full-2014-10-31/models/german-fast.tagger"));
        return types;
    }
}

