/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.parser.lexparser.ChineseTreebankParserParams;
import edu.stanford.nlp.parser.lexparser.IntTaggedWord;
import edu.stanford.nlp.parser.lexparser.Interner;
import edu.stanford.nlp.parser.lexparser.Lexicon;
import edu.stanford.nlp.parser.lexparser.UnknownWordModel;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.stats.Distribution;
import edu.stanford.nlp.stats.GeneralizedCounter;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.international.pennchinese.RadicalMap;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Index;
import edu.stanford.nlp.util.Timing;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InvalidObjectException;
import java.io.ObjectStreamException;
import java.io.Serializable;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class ChineseCharacterBasedLexicon
implements Lexicon {
    private final double lengthPenalty;
    private final int penaltyType;
    private Map<List, Distribution<Symbol>> charDistributions;
    private Set<Symbol> knownChars;
    private Distribution<String> POSDistribution;
    private final boolean useUnknownCharacterModel;
    private static final int CONTEXT_LENGTH = 2;
    private final Index<String> wordIndex;
    private final Index<String> tagIndex;
    private transient List<List<TaggedWord>> trainingSentences;
    private static final long serialVersionUID = -5357655683145854069L;

    public ChineseCharacterBasedLexicon(ChineseTreebankParserParams params, Index<String> wordIndex, Index<String> tagIndex) {
        this.wordIndex = wordIndex;
        this.tagIndex = tagIndex;
        this.lengthPenalty = params.lengthPenalty;
        this.penaltyType = params.penaltyType;
        this.useUnknownCharacterModel = params.useUnknownCharacterModel;
    }

    @Override
    public void initializeTraining(double numTrees) {
        this.trainingSentences = new ArrayList<List<TaggedWord>>();
    }

    @Override
    public void train(Collection<Tree> trees) {
        for (Tree tree : trees) {
            this.train(tree, 1.0);
        }
    }

    @Override
    public void train(Collection<Tree> trees, double weight) {
        for (Tree tree : trees) {
            this.train(tree, weight);
        }
    }

    @Override
    public void train(Tree tree, double weight) {
        this.trainingSentences.add(tree.taggedYield());
    }

    @Override
    public void trainUnannotated(List<TaggedWord> sentence, double weight) {
        throw new UnsupportedOperationException("This version of the parser does not support non-tree training data");
    }

    @Override
    public void incrementTreesRead(double weight) {
        throw new UnsupportedOperationException();
    }

    @Override
    public void train(TaggedWord tw, int loc, double weight) {
        throw new UnsupportedOperationException();
    }

    @Override
    public void train(List<TaggedWord> sentence, double weight) {
        this.trainingSentences.add(sentence);
    }

    @Override
    public void finishTraining() {
        Timing.tick("Counting characters...");
        ClassicCounter<Symbol> charCounter = new ClassicCounter<Symbol>();
        for (List<TaggedWord> labels : this.trainingSentences) {
            for (TaggedWord label : labels) {
                String word = label.word();
                if (word.equals(".$.")) continue;
                int length = word.length();
                for (int j = 0; j < length; ++j) {
                    Symbol sym2 = Symbol.cannonicalSymbol(word.charAt(j));
                    charCounter.incrementCount(sym2);
                }
                charCounter.incrementCount(Symbol.END_WORD);
            }
        }
        Set singletons = Counters.keysBelow(charCounter, 1.5);
        this.knownChars = Generics.newHashSet(charCounter.keySet());
        Timing.tick("Counting nGrams...");
        GeneralizedCounter[] POSspecificCharNGrams = new GeneralizedCounter[3];
        for (int i = 0; i <= 2; ++i) {
            POSspecificCharNGrams[i] = new GeneralizedCounter(i + 2);
        }
        ClassicCounter<String> POSCounter = new ClassicCounter<String>();
        List<Object> context = new ArrayList<String>(3);
        for (List<TaggedWord> words : this.trainingSentences) {
            for (TaggedWord taggedWord : words) {
                String word = taggedWord.word();
                String tag = taggedWord.tag();
                this.tagIndex.add(tag);
                if (word.equals(".$.")) continue;
                POSCounter.incrementCount(tag);
                int size = word.length();
                block6: for (int i = 0; i <= size; ++i) {
                    Symbol sym3;
                    Symbol unknownCharClass = null;
                    context.clear();
                    context.add(tag);
                    if (i < size) {
                        char thisCh = word.charAt(i);
                        sym3 = Symbol.cannonicalSymbol(thisCh);
                        if (singletons.contains(sym3)) {
                            unknownCharClass = this.unknownCharClass(sym3);
                            charCounter.incrementCount(unknownCharClass);
                        }
                    } else {
                        sym3 = Symbol.END_WORD;
                    }
                    POSspecificCharNGrams[0].incrementCount(context, sym3);
                    if (unknownCharClass != null) {
                        POSspecificCharNGrams[0].incrementCount(context, unknownCharClass);
                    }
                    for (int j = 1; j <= 2; ++j) {
                        if (i - j < 0) {
                            context.add(Symbol.BEGIN_WORD);
                            POSspecificCharNGrams[j].incrementCount(context, sym3);
                            if (unknownCharClass == null) continue block6;
                            POSspecificCharNGrams[j].incrementCount(context, unknownCharClass);
                            continue block6;
                        }
                        Symbol prev = Symbol.cannonicalSymbol(word.charAt(i - j));
                        if (singletons.contains(prev)) {
                            context.add(this.unknownCharClass(prev));
                        } else {
                            context.add(prev);
                        }
                        POSspecificCharNGrams[j].incrementCount(context, sym3);
                        if (unknownCharClass == null) continue;
                        POSspecificCharNGrams[j].incrementCount(context, unknownCharClass);
                    }
                }
            }
        }
        this.POSDistribution = Distribution.getDistribution(POSCounter);
        Timing.tick("Creating character prior distribution...");
        this.charDistributions = Generics.newHashMap();
        int numberOfKeys = charCounter.size() + singletons.size();
        Distribution prior = Distribution.goodTuringSmoothedCounter(charCounter, numberOfKeys);
        this.charDistributions.put(Collections.EMPTY_LIST, prior);
        for (int i = 0; i <= 2; ++i) {
            Set counterEntries = POSspecificCharNGrams[i].lowestLevelCounterEntrySet();
            Timing.tick("Creating " + counterEntries.size() + " character " + (i + 1) + "-gram distributions...");
            for (Map.Entry entry : counterEntries) {
                context = entry.getKey();
                ClassicCounter c = entry.getValue();
                Distribution<Symbol> thisPrior = this.charDistributions.get(context.subList(0, context.size() - 1));
                double priorWeight = (double)thisPrior.getNumberOfKeys() / 200.0;
                Distribution<Symbol> newDist = Distribution.dynamicCounterWithDirichletPrior(c, thisPrior, priorWeight);
                this.charDistributions.put(context, newDist);
            }
        }
    }

    public Distribution<String> getPOSDistribution() {
        return this.POSDistribution;
    }

    public static boolean isForeign(String s) {
        for (int i = 0; i < s.length(); ++i) {
            int num = Character.getNumericValue(s.charAt(i));
            if (num >= 10 && num <= 35) continue;
            return false;
        }
        return true;
    }

    private Symbol unknownCharClass(Symbol ch) {
        if (this.useUnknownCharacterModel) {
            return new Symbol(Character.toString(RadicalMap.getRadical(ch.getCh()))).intern();
        }
        return Symbol.UNKNOWN;
    }

    @Override
    public float score(IntTaggedWord iTW, int loc, String word, String featureSpec) {
        int i;
        String tag = this.tagIndex.get(iTW.tag);
        assert (!word.equals(".$."));
        char[] chars = word.toCharArray();
        ArrayList<Object> charList = new ArrayList<Object>(chars.length + 2 + 1);
        charList.add(Symbol.END_WORD);
        for (i = chars.length - 1; i >= 0; --i) {
            Symbol ch = Symbol.cannonicalSymbol(chars[i]);
            if (this.knownChars.contains(ch)) {
                charList.add(ch);
                continue;
            }
            charList.add(this.unknownCharClass(ch));
        }
        for (i = 0; i < 2; ++i) {
            charList.add(Symbol.BEGIN_WORD);
        }
        double score = 0.0;
        int size = charList.size();
        for (int i2 = 0; i2 < size - 2; ++i2) {
            Symbol nextChar = (Symbol)charList.get(i2);
            charList.set(i2, tag);
            double charScore = this.getBackedOffDist(charList.subList(i2, i2 + 2 + 1)).probabilityOf(nextChar);
            score += Math.log(charScore);
        }
        switch (this.penaltyType) {
            case 0: {
                break;
            }
            case 1: {
                score -= (double)(chars.length * (chars.length + 1)) * (this.lengthPenalty / 2.0);
                break;
            }
            case 2: {
                score -= (double)(chars.length - 1) * this.lengthPenalty;
            }
        }
        return (float)score;
    }

    private Distribution<Symbol> getBackedOffDist(List<Serializable> context) {
        for (int i = 3; i >= 0; --i) {
            List<Serializable> l = context.subList(0, i);
            if (!this.charDistributions.containsKey(l)) continue;
            return this.charDistributions.get(l);
        }
        throw new RuntimeException("OOPS... no prior distribution...?");
    }

    /*
     * Enabled aggressive block sorting
     */
    public String sampleFrom(String tag) {
        StringBuilder buf = new StringBuilder();
        ArrayList<Serializable> context = new ArrayList<Serializable>(3);
        context.add((Serializable)((Object)tag));
        for (int i = 0; i < 2; ++i) {
            context.add(Symbol.BEGIN_WORD);
        }
        Distribution<Symbol> d = this.getBackedOffDist(context);
        Symbol gen = d.sampleFrom();
        while (gen != Symbol.END_WORD) {
            buf.append(gen.getCh());
            switch (this.penaltyType) {
                case 1: {
                    if (!(Math.random() > Math.pow(this.lengthPenalty, buf.length()))) break;
                    return buf.toString();
                }
                case 2: {
                    if (!(Math.random() > this.lengthPenalty)) break;
                    return buf.toString();
                }
            }
            for (int i = 1; i < 2; ++i) {
                context.set(i + 1, (Serializable)context.get(i));
            }
            context.set(1, gen);
            d = this.getBackedOffDist(context);
            gen = d.sampleFrom();
        }
        return buf.toString();
    }

    public String sampleFrom() {
        String POS2 = this.POSDistribution.sampleFrom();
        return this.sampleFrom(POS2);
    }

    @Override
    public Iterator<IntTaggedWord> ruleIteratorByWord(int word, int loc, String featureSpec) {
        throw new UnsupportedOperationException("ChineseCharacterBasedLexicon has no rule iterator!");
    }

    @Override
    public Iterator<IntTaggedWord> ruleIteratorByWord(String word, int loc, String featureSpec) {
        throw new UnsupportedOperationException("ChineseCharacterBasedLexicon has no rule iterator!");
    }

    @Override
    public int numRules() {
        return 0;
    }

    private Distribution<Integer> getWordLengthDistribution() {
        int samples = 0;
        ClassicCounter<Integer> c = new ClassicCounter<Integer>();
        while (samples++ < 10000) {
            String s = this.sampleFrom();
            c.incrementCount(s.length());
            if (samples % 1000 != 0) continue;
            System.out.print(".");
        }
        System.out.println();
        Distribution<Integer> genWordLengthDist = Distribution.getDistribution(c);
        return genWordLengthDist;
    }

    @Override
    public void readData(BufferedReader in) throws IOException {
        throw new UnsupportedOperationException();
    }

    @Override
    public void writeData(Writer w) throws IOException {
        throw new UnsupportedOperationException();
    }

    @Override
    public boolean isKnown(int word) {
        throw new UnsupportedOperationException();
    }

    @Override
    public boolean isKnown(String word) {
        throw new UnsupportedOperationException();
    }

    @Override
    public UnknownWordModel getUnknownWordModel() {
        return null;
    }

    @Override
    public void setUnknownWordModel(UnknownWordModel uwm) {
    }

    @Override
    public void train(Collection<Tree> trees, Collection<Tree> rawTrees) {
        this.train(trees);
    }

    static class Symbol
    implements Serializable {
        private static final int UNKNOWN_TYPE = 0;
        private static final int DIGIT_TYPE = 1;
        private static final int LETTER_TYPE = 2;
        private static final int BEGIN_WORD_TYPE = 3;
        private static final int END_WORD_TYPE = 4;
        private static final int CHAR_TYPE = 5;
        private static final int UNK_CLASS_TYPE = 6;
        private char ch;
        private String unkClass;
        int type;
        public static final Symbol UNKNOWN = new Symbol(0);
        public static final Symbol DIGIT = new Symbol(1);
        public static final Symbol LETTER = new Symbol(2);
        public static final Symbol BEGIN_WORD = new Symbol(3);
        public static final Symbol END_WORD = new Symbol(4);
        public static final Interner<Symbol> interner = new Interner();
        private static final long serialVersionUID = 8925032621317022510L;

        public Symbol(char ch) {
            this.type = 5;
            this.ch = ch;
        }

        public Symbol(String unkClass) {
            this.type = 6;
            this.unkClass = unkClass;
        }

        public Symbol(int type) {
            assert (type != 5);
            this.type = type;
        }

        public static Symbol cannonicalSymbol(char ch) {
            if (Character.isDigit(ch)) {
                return DIGIT;
            }
            if (Character.getNumericValue(ch) >= 10 && Character.getNumericValue(ch) <= 35) {
                return LETTER;
            }
            return new Symbol(ch);
        }

        public char getCh() {
            if (this.type == 5) {
                return this.ch;
            }
            return '*';
        }

        public Symbol intern() {
            return interner.intern(this);
        }

        public String toString() {
            if (this.type == 5) {
                return "[u" + this.ch + "]";
            }
            if (this.type == 6) {
                return "UNK:" + this.unkClass;
            }
            return Integer.toString(this.type);
        }

        protected Object readResolve() throws ObjectStreamException {
            switch (this.type) {
                case 5: {
                    return this.intern();
                }
                case 6: {
                    return this.intern();
                }
                case 0: {
                    return UNKNOWN;
                }
                case 1: {
                    return DIGIT;
                }
                case 2: {
                    return LETTER;
                }
                case 3: {
                    return BEGIN_WORD;
                }
                case 4: {
                    return END_WORD;
                }
            }
            throw new InvalidObjectException("ILLEGAL VALUE IN SERIALIZED SYMBOL");
        }

        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (!(o instanceof Symbol)) {
                return false;
            }
            Symbol symbol = (Symbol)o;
            if (this.ch != symbol.ch) {
                return false;
            }
            if (this.type != symbol.type) {
                return false;
            }
            return !(this.unkClass != null ? !this.unkClass.equals(symbol.unkClass) : symbol.unkClass != null);
        }

        public int hashCode() {
            int result = this.ch;
            result = 29 * result + (this.unkClass != null ? this.unkClass.hashCode() : 0);
            result = 29 * result + this.type;
            return result;
        }
    }
}

