/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.international.french.process;

import edu.stanford.nlp.international.french.process.FrenchLexer;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.process.AbstractTokenizer;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.LexedTokenFactory;
import edu.stanford.nlp.process.Tokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.process.WordTokenFactory;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.StringUtils;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;

public class FrenchTokenizer<T extends HasWord>
extends AbstractTokenizer<T> {
    private final FrenchLexer lexer;
    private final boolean splitCompounds;
    private List<CoreLabel> compoundBuffer;
    public static final String FTB_OPTIONS = "ptb3Ellipsis=true,normalizeParentheses=true,ptb3Dashes=false,splitCompounds=true";

    public FrenchTokenizer(Reader r, LexedTokenFactory<T> tf, Properties lexerProperties, boolean splitCompounds) {
        this.lexer = new FrenchLexer(r, tf, lexerProperties);
        this.splitCompounds = splitCompounds;
        if (splitCompounds) {
            this.compoundBuffer = Generics.newLinkedList();
        }
    }

    @Override
    protected T getNext() {
        try {
            CoreLabel cl;
            HasWord nextToken = null;
            do {
                HasWord hasWord = nextToken = this.splitCompounds && this.compoundBuffer.size() > 0 ? (HasWord)this.compoundBuffer.remove(0) : (HasWord)this.lexer.next();
            } while (nextToken != null && nextToken.word().length() == 0);
            if (this.splitCompounds && nextToken instanceof CoreLabel && (cl = (CoreLabel)nextToken).containsKey(CoreAnnotations.ParentAnnotation.class) && ((String)cl.get(CoreAnnotations.ParentAnnotation.class)).equals("comp")) {
                nextToken = this.processCompound(cl);
            }
            return (T)nextToken;
        }
        catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    }

    private CoreLabel processCompound(CoreLabel cl) {
        String[] parts;
        cl.remove(CoreAnnotations.ParentAnnotation.class);
        for (String part : parts = cl.word().replaceAll("\\-", " - ").split("\\s+")) {
            CoreLabel newLabel = new CoreLabel(cl);
            newLabel.setWord(part);
            newLabel.setValue(part);
            newLabel.set(CoreAnnotations.OriginalTextAnnotation.class, part);
            this.compoundBuffer.add(newLabel);
        }
        return this.compoundBuffer.remove(0);
    }

    public static TokenizerFactory<CoreLabel> factory() {
        return FrenchTokenizerFactory.newTokenizerFactory();
    }

    public static TokenizerFactory<CoreLabel> ftbFactory() {
        TokenizerFactory<CoreLabel> tf = FrenchTokenizerFactory.newTokenizerFactory();
        tf.setOptions(FTB_OPTIONS);
        return tf;
    }

    private static String usage() {
        StringBuilder sb = new StringBuilder();
        String nl = System.getProperty("line.separator");
        sb.append(String.format("Usage: java %s [OPTIONS] < file%n%n", FrenchTokenizer.class.getName()));
        sb.append("Options:").append(nl);
        sb.append("   -help          : Print this message.").append(nl);
        sb.append("   -ftb           : Tokenization for experiments in Green et al. (2011).").append(nl);
        sb.append("   -lowerCase     : Apply lowercasing.").append(nl);
        sb.append("   -encoding type : Encoding format.").append(nl);
        sb.append("   -orthoOpts str : Orthographic options (see FrenchLexer.java)").append(nl);
        return sb.toString();
    }

    private static Map<String, Integer> argOptionDefs() {
        Map<String, Integer> argOptionDefs = Generics.newHashMap();
        argOptionDefs.put("help", 0);
        argOptionDefs.put("ftb", 0);
        argOptionDefs.put("lowerCase", 0);
        argOptionDefs.put("encoding", 1);
        argOptionDefs.put("orthoOpts", 1);
        return argOptionDefs;
    }

    public static void main(String[] args) {
        Properties options = StringUtils.argsToProperties(args, FrenchTokenizer.argOptionDefs());
        if (options.containsKey("help")) {
            System.err.println(FrenchTokenizer.usage());
            return;
        }
        TokenizerFactory<CoreLabel> tf = options.containsKey("ftb") ? FrenchTokenizer.ftbFactory() : FrenchTokenizer.factory();
        String orthoOptions = options.getProperty("orthoOpts", "");
        tf.setOptions(orthoOptions);
        tf.setOptions("tokenizeNLs");
        String encoding = options.getProperty("encoding", "UTF-8");
        boolean toLower = PropertiesUtils.getBool(options, "lowerCase", false);
        int nLines = 0;
        int nTokens = 0;
        long startTime = System.nanoTime();
        try {
            Tokenizer<CoreLabel> tokenizer = tf.getTokenizer(new InputStreamReader(System.in, encoding));
            boolean printSpace = false;
            while (tokenizer.hasNext()) {
                ++nTokens;
                String word = tokenizer.next().word();
                if (word.equals("*NL*")) {
                    ++nLines;
                    printSpace = false;
                    System.out.println();
                    continue;
                }
                if (printSpace) {
                    System.out.print(" ");
                }
                String outputToken = toLower ? word.toLowerCase(Locale.FRENCH) : word;
                System.out.print(outputToken);
                printSpace = true;
            }
        }
        catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        long elapsedTime = System.nanoTime() - startTime;
        double linesPerSec = (double)nLines / ((double)elapsedTime / 1.0E9);
        System.err.printf("Done! Tokenized %d lines (%d tokens) at %.2f lines/sec%n", nLines, nTokens, linesPerSec);
    }

    public static class FrenchTokenizerFactory<T extends HasWord>
    implements TokenizerFactory<T>,
    Serializable {
        private static final long serialVersionUID = 946818805507187330L;
        protected final LexedTokenFactory<T> factory;
        protected Properties lexerProperties = new Properties();
        protected boolean splitCompoundOption = false;

        public static TokenizerFactory<CoreLabel> newTokenizerFactory() {
            return new FrenchTokenizerFactory<CoreLabel>(new CoreLabelTokenFactory());
        }

        public static TokenizerFactory<Word> newWordTokenizerFactory(String options) {
            return new FrenchTokenizerFactory<Word>(new WordTokenFactory(), options);
        }

        private FrenchTokenizerFactory(LexedTokenFactory<T> factory) {
            this.factory = factory;
        }

        private FrenchTokenizerFactory(LexedTokenFactory<T> factory, String options) {
            this(factory);
            this.setOptions(options);
        }

        @Override
        public Iterator<T> getIterator(Reader r) {
            return this.getTokenizer(r);
        }

        @Override
        public Tokenizer<T> getTokenizer(Reader r) {
            return new FrenchTokenizer<T>(r, this.factory, this.lexerProperties, this.splitCompoundOption);
        }

        @Override
        public void setOptions(String options) {
            String[] optionList;
            for (String option : optionList = options.split(",")) {
                String[] fields = option.split("=");
                if (fields.length == 1) {
                    if (fields[0].equals("splitCompounds")) {
                        this.splitCompoundOption = true;
                        continue;
                    }
                    this.lexerProperties.put(option, "true");
                    continue;
                }
                if (fields.length == 2) {
                    if (fields[0].equals("splitCompounds")) {
                        this.splitCompoundOption = Boolean.valueOf(fields[1]);
                        continue;
                    }
                    this.lexerProperties.put(fields[0], fields[1]);
                    continue;
                }
                System.err.printf("%s: Bad option %s%n", this.getClass().getName(), option);
            }
        }

        @Override
        public Tokenizer<T> getTokenizer(Reader r, String extraOptions) {
            this.setOptions(extraOptions);
            return this.getTokenizer(r);
        }
    }
}

