/*
 * Decompiled with CFR 0.152.
 */
package cc.mallet.pipe;

import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.SimpleTokenizer;
import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureCounter;
import cc.mallet.types.FeatureSequence;
import cc.mallet.types.IDSorter;
import cc.mallet.types.Instance;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Arrays;

public class FeatureCountPipe
extends Pipe {
    FeatureCounter counter;
    static final long serialVersionUID = 1L;

    public FeatureCountPipe() {
        super(new Alphabet(), null);
        this.counter = new FeatureCounter(this.getDataAlphabet());
    }

    public FeatureCountPipe(Alphabet dataAlphabet, Alphabet targetAlphabet) {
        super(dataAlphabet, targetAlphabet);
        this.counter = new FeatureCounter(dataAlphabet);
    }

    @Override
    public Instance pipe(Instance instance) {
        if (instance.getData() instanceof FeatureSequence) {
            FeatureSequence features = (FeatureSequence)instance.getData();
            for (int position = 0; position < features.size(); ++position) {
                this.counter.increment(features.getIndexAtPosition(position));
            }
        } else {
            throw new IllegalArgumentException("Looking for a FeatureSequence, found a " + instance.getData().getClass());
        }
        return instance;
    }

    public Alphabet getPrunedAlphabet(int minimumCount) {
        Alphabet currentAlphabet = this.getDataAlphabet();
        Alphabet prunedAlphabet = new Alphabet();
        for (int feature = 0; feature < currentAlphabet.size(); ++feature) {
            if (this.counter.get(feature) < minimumCount) continue;
            prunedAlphabet.lookupObject(currentAlphabet.lookupIndex(feature));
        }
        prunedAlphabet.stopGrowth();
        return prunedAlphabet;
    }

    public void writePrunedWords(File prunedFile, int minimumCount) throws IOException {
        PrintWriter out2 = new PrintWriter(prunedFile);
        Alphabet currentAlphabet = this.getDataAlphabet();
        for (int feature = 0; feature < currentAlphabet.size(); ++feature) {
            if (this.counter.get(feature) >= minimumCount) continue;
            out2.println(currentAlphabet.lookupObject(feature));
        }
        out2.close();
    }

    public void addPrunedWordsToStoplist(SimpleTokenizer tokenizer, int minimumCount) {
        Alphabet currentAlphabet = this.getDataAlphabet();
        for (int feature = 0; feature < currentAlphabet.size(); ++feature) {
            if (this.counter.get(feature) >= minimumCount) continue;
            tokenizer.stop((String)currentAlphabet.lookupObject(feature));
        }
    }

    public void writeCommonWords(File commonFile, int totalWords) throws IOException {
        PrintWriter out2 = new PrintWriter(commonFile);
        Alphabet currentAlphabet = this.getDataAlphabet();
        Object[] sortedWords = new IDSorter[currentAlphabet.size()];
        for (int type = 0; type < currentAlphabet.size(); ++type) {
            sortedWords[type] = new IDSorter(type, this.counter.get(type));
        }
        Arrays.sort(sortedWords);
        int max = totalWords;
        if (currentAlphabet.size() < max) {
            max = currentAlphabet.size();
        }
        for (int rank = 0; rank < max; ++rank) {
            int type = ((IDSorter)sortedWords[rank]).getID();
            out2.println(currentAlphabet.lookupObject(type));
        }
        out2.close();
    }
}

