/*
 * Decompiled with CFR 0.152.
 */
package cc.mallet.fst.tests;

import cc.mallet.fst.CRF;
import cc.mallet.fst.CRFOptimizableByLabelLikelihood;
import cc.mallet.fst.CRFTrainerByLabelLikelihood;
import cc.mallet.fst.CRFTrainerByStochasticGradient;
import cc.mallet.fst.MaxLatticeDefault;
import cc.mallet.fst.SumLatticeDefault;
import cc.mallet.fst.SumLatticeScaling;
import cc.mallet.fst.TokenAccuracyEvaluator;
import cc.mallet.fst.Transducer;
import cc.mallet.optimize.tests.TestOptimizable;
import cc.mallet.pipe.CharSequence2TokenSequence;
import cc.mallet.pipe.LineGroupString2TokenSequence;
import cc.mallet.pipe.Noop;
import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.PrintInputAndTarget;
import cc.mallet.pipe.SerialPipes;
import cc.mallet.pipe.Target2LabelSequence;
import cc.mallet.pipe.TokenSequence2FeatureVectorSequence;
import cc.mallet.pipe.TokenSequenceLowercase;
import cc.mallet.pipe.TokenSequenceMatchDataAndTarget;
import cc.mallet.pipe.TokenSequenceParseFeatureString;
import cc.mallet.pipe.iterator.ArrayIterator;
import cc.mallet.pipe.iterator.LineGroupIterator;
import cc.mallet.pipe.tsf.OffsetConjunctions;
import cc.mallet.pipe.tsf.TokenText;
import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureSequence;
import cc.mallet.types.FeatureVector;
import cc.mallet.types.FeatureVectorSequence;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.types.MatrixOps;
import cc.mallet.types.Sequence;
import cc.mallet.types.SparseVector;
import cc.mallet.types.Token;
import cc.mallet.types.TokenSequence;
import cc.mallet.util.FileUtils;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.PrintWriter;
import java.io.Serializable;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.Writer;
import java.util.Random;
import java.util.regex.Pattern;
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
import junit.textui.TestRunner;

public class TestCRF
extends TestCase {
    public static final String[] data = new String[]{"Free software is a matter of the users' freedom to run, copy, distribute, study, change and improve the software. More precisely, it refers to four kinds of freedom, for the users of the software.", "The freedom to run the program, for any purpose.", "The freedom to study how the program works, and adapt it to your needs.", "The freedom to redistribute copies so you can help your neighbor.", "The freedom to improve the program, and release your improvements to the public, so that the whole community benefits.", "A program is free software if users have all of these freedoms. Thus, you should be free to redistribute copies, either with or without modifications, either gratis or charging a fee for distribution, to anyone anywhere. Being free to do these things means (among other things) that you do not have to ask or pay for permission.", "You should also have the freedom to make modifications and use them privately in your own work or play, without even mentioning that they exist. If you do publish your changes, you should not be required to notify anyone in particular, or in any particular way.", "In order for the freedoms to make changes, and to publish improved versions, to be meaningful, you must have access to the source code of the program. Therefore, accessibility of source code is a necessary condition for free software.", "Finally, note that criteria such as those stated in this free software definition require careful thought for their interpretation. To decide whether a specific software license qualifies as a free software license, we judge it based on these criteria to determine whether it fits their spirit as well as the precise words. If a license includes unconscionable restrictions, we reject it, even if we did not anticipate the issue in these criteria. Sometimes a license requirement raises an issue that calls for extensive thought, including discussions with a lawyer, before we can decide if the requirement is acceptable. When we reach a conclusion about a new issue, we often update these criteria to make it easier to see why certain licenses do or don't qualify.", "In order for these freedoms to be real, they must be irrevocable as long as you do nothing wrong; if the developer of the software has the power to revoke the license, without your doing anything to give cause, the software is not free.", "However, certain kinds of rules about the manner of distributing free software are acceptable, when they don't conflict with the central freedoms. For example, copyleft (very simply stated) is the rule that when redistributing the program, you cannot add restrictions to deny other people the central freedoms. This rule does not conflict with the central freedoms; rather it protects them.", "Thus, you may have paid money to get copies of free software, or you may have obtained copies at no charge. But regardless of how you got your copies, you always have the freedom to copy and change the software, even to sell copies.", "Rules about how to package a modified version are acceptable, if they don't effectively block your freedom to release modified versions. Rules that ``if you make the program available in this way, you must make it available in that way also'' can be acceptable too, on the same condition. (Note that such a rule still leaves you the choice of whether to publish the program or not.) It is also acceptable for the license to require that, if you have distributed a modified version and a previous developer asks for a copy of it, you must send one.", "Sometimes government export control regulations and trade sanctions can constrain your freedom to distribute copies of programs internationally. Software developers do not have the power to eliminate or override these restrictions, but what they can and must do is refuse to impose them as conditions of use of the program. In this way, the restrictions will not affect activities and people outside the jurisdictions of these governments.", "Finally, note that criteria such as those stated in this free software definition require careful thought for their interpretation. To decide whether a specific software license qualifies as a free software license, we judge it based on these criteria to determine whether it fits their spirit as well as the precise words. If a license includes unconscionable restrictions, we reject it, even if we did not anticipate the issue in these criteria. Sometimes a license requirement raises an issue that calls for extensive thought, including discussions with a lawyer, before we can decide if the requirement is acceptable. When we reach a conclusion about a new issue, we often update these criteria to make it easier to see why certain licenses do or don't qualify.", "The GNU Project was launched in 1984 to develop a complete Unix-like operating system which is free software: the GNU system."};
    static String toy = "A a\nB b\nC c\nD d\nB b\nC c\n";
    private static String oldCrfFile = "test/edu/umass/cs/mallet/base/fst/crf.cnl03.ser.gz";
    private static String testString = "John NNP B-NP O\nDoe NNP I-NP O\nsaid VBZ B-VP O\nhi NN B-NP O\n";

    public TestCRF(String name) {
        super(name);
    }

    public void testGetSetParameters() {
        int inputVocabSize = 100;
        int numStates = 5;
        Alphabet inputAlphabet = new Alphabet();
        for (int i = 0; i < inputVocabSize; ++i) {
            inputAlphabet.lookupIndex("feature" + i);
        }
        Alphabet outputAlphabet = new Alphabet();
        CRF crf = new CRF(inputAlphabet, outputAlphabet);
        String[] stateNames = new String[numStates];
        for (int i = 0; i < numStates; ++i) {
            stateNames[i] = "state" + i;
        }
        crf.addFullyConnectedStates(stateNames);
        CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
        CRFOptimizableByLabelLikelihood mcrf = crft.getOptimizableCRF(new InstanceList(null));
        TestOptimizable.testGetSetParameters(mcrf);
    }

    public void testSumLogProb() {
        double w1 = Math.log(0.2);
        double w2 = Math.log(0.8);
        double s1 = Math.log(1.0);
        double s2 = Transducer.sumLogProb(w1, w2);
        TestCRF.assertEquals(s1, s2, 1.0E-5);
        w1 = Math.log(99999.0);
        w2 = Math.log(1.0E-4);
        s1 = Math.log(99999.0001);
        s2 = Transducer.sumLogProb(w1, w2);
        TestCRF.assertEquals(s1, s2, 1.0E-5);
    }

    public void testSumLattice() {
        int time;
        int inputVocabSize = 1;
        int numStates = 2;
        Alphabet inputAlphabet = new Alphabet();
        for (int i = 0; i < inputVocabSize; ++i) {
            inputAlphabet.lookupIndex("feature" + i);
        }
        Alphabet outputAlphabet = new Alphabet();
        CRF crf = new CRF(inputAlphabet, outputAlphabet);
        String[] stateNames = new String[numStates];
        for (int i = 0; i < numStates; ++i) {
            stateNames[i] = "state" + i;
        }
        crf.addFullyConnectedStates(stateNames);
        crf.setWeightsDimensionDensely();
        crf.getState(0).setInitialWeight(1.0);
        crf.getState(1).setInitialWeight(Double.NEGATIVE_INFINITY);
        crf.getState(0).setFinalWeight(0.0);
        crf.getState(1).setFinalWeight(0.0);
        crf.setParameter(0, 0, 0, Double.NEGATIVE_INFINITY);
        crf.setParameter(0, 1, 0, 1.0);
        crf.setParameter(1, 1, 0, 1.0);
        crf.setParameter(1, 0, 0, Double.NEGATIVE_INFINITY);
        FeatureVectorSequence fvs = new FeatureVectorSequence(new FeatureVector[]{new FeatureVector(crf.getInputAlphabet(), new double[]{1.0}), new FeatureVector(crf.getInputAlphabet(), new double[]{1.0}), new FeatureVector(crf.getInputAlphabet(), new double[]{1.0})});
        SumLatticeDefault lattice = new SumLatticeDefault((Transducer)crf, (Sequence)fvs, true);
        TestCRF.assertTrue(lattice.getGammaProbability(0, crf.getState(0)) == 1.0);
        TestCRF.assertTrue(lattice.getGammaProbability(0, crf.getState(1)) == 0.0);
        TestCRF.assertTrue(lattice.getGammaProbability(1, crf.getState(0)) == 0.0);
        TestCRF.assertTrue(lattice.getGammaProbability(1, crf.getState(1)) == 1.0);
        TestCRF.assertTrue(lattice.getXiProbability(1, crf.getState(1), crf.getState(1)) == 1.0);
        TestCRF.assertTrue(lattice.getXiProbability(1, crf.getState(1), crf.getState(0)) == 0.0);
        TestCRF.assertTrue("Lattice weight = " + lattice.getTotalWeight(), lattice.getTotalWeight() == 4.0);
        for (time = 0; time < lattice.length() - 1; ++time) {
            double gammasum = lattice.getGammaProbability(time, crf.getState(0)) + lattice.getGammaProbability(time, crf.getState(1));
            TestCRF.assertEquals("Gammas at time step " + time + " sum to " + gammasum, 1.0, gammasum, 1.0E-4);
        }
        for (time = 0; time < lattice.length() - 1; ++time) {
            double xissum = lattice.getXiProbability(time, crf.getState(0), crf.getState(0)) + lattice.getXiProbability(time, crf.getState(0), crf.getState(1)) + lattice.getXiProbability(time, crf.getState(1), crf.getState(0)) + lattice.getXiProbability(time, crf.getState(1), crf.getState(1));
            TestCRF.assertEquals("Xis at time step " + time + " sum to " + xissum, 1.0, xissum, 1.0E-4);
        }
    }

    public void testMaxLattice() {
        int inputVocabSize = 1;
        int numStates = 2;
        Alphabet inputAlphabet = new Alphabet();
        for (int i = 0; i < inputVocabSize; ++i) {
            inputAlphabet.lookupIndex("feature" + i);
        }
        Alphabet outputAlphabet = new Alphabet();
        CRF crf = new CRF(inputAlphabet, outputAlphabet);
        String[] stateNames = new String[numStates];
        for (int i = 0; i < numStates; ++i) {
            stateNames[i] = "state" + i;
        }
        crf.addFullyConnectedStates(stateNames);
        crf.setWeightsDimensionDensely();
        crf.getState(0).setInitialWeight(1.0);
        crf.getState(1).setInitialWeight(Double.NEGATIVE_INFINITY);
        crf.getState(0).setFinalWeight(0.0);
        crf.getState(1).setFinalWeight(0.0);
        crf.setParameter(0, 0, 0, Double.NEGATIVE_INFINITY);
        crf.setParameter(0, 1, 0, 1.0);
        crf.setParameter(1, 1, 0, 1.0);
        crf.setParameter(1, 0, 0, Double.NEGATIVE_INFINITY);
        FeatureVectorSequence fvs = new FeatureVectorSequence(new FeatureVector[]{new FeatureVector(crf.getInputAlphabet(), new double[]{1.0}), new FeatureVector(crf.getInputAlphabet(), new double[]{1.0}), new FeatureVector(crf.getInputAlphabet(), new double[]{1.0})});
        MaxLatticeDefault lattice = new MaxLatticeDefault(crf, fvs);
        Sequence<Transducer.State> viterbiPath = lattice.bestStateSequence();
        TestCRF.assertTrue(viterbiPath.get(0) == crf.getState(0));
        TestCRF.assertTrue(viterbiPath.get(1) == crf.getState(1));
        TestCRF.assertTrue(viterbiPath.get(2) == crf.getState(1));
    }

    public void doTestCost(boolean useSave) {
        CRF crf;
        int inputVocabSize = 4;
        int numStates = 5;
        File f = new File("TestObject.obj");
        File f2 = new File("TestObject2.obj");
        Alphabet inputAlphabet = new Alphabet();
        for (int i = 0; i < inputVocabSize; ++i) {
            inputAlphabet.lookupIndex("feature" + i);
        }
        Alphabet outputAlphabet = new Alphabet();
        String[] stateNames = new String[numStates];
        for (int i = 0; i < numStates; ++i) {
            stateNames[i] = "state" + i;
            outputAlphabet.lookupIndex(stateNames[i]);
        }
        CRF saveCRF = crf = new CRF(inputAlphabet, outputAlphabet);
        FeatureVectorSequence fvs = new FeatureVectorSequence(new FeatureVector[]{new FeatureVector(crf.getInputAlphabet(), new int[]{1, 2, 3}), new FeatureVector(crf.getInputAlphabet(), new int[]{1, 2, 3}), new FeatureVector(crf.getInputAlphabet(), new int[]{1, 2, 3}), new FeatureVector(crf.getInputAlphabet(), new int[]{1, 2, 3})});
        FeatureSequence ss = new FeatureSequence(crf.getOutputAlphabet(), new int[]{0, 1, 2, 3});
        InstanceList ilist = new InstanceList(new Noop(inputAlphabet, outputAlphabet));
        ilist.add(fvs, ss, null, null);
        crf.addFullyConnectedStates(stateNames);
        CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
        crft.setUseSparseWeights(false);
        if (useSave) {
            ObjectOutputStream oos;
            try {
                oos = new ObjectOutputStream(new FileOutputStream(f));
                oos.writeObject(crf);
                oos.close();
            }
            catch (IOException e) {
                System.err.println("Exception writing file: " + e);
            }
            System.err.println("Wrote out CRF");
            System.err.println("CRF parameters. hyperbolicPriorSlope: " + crft.getUseHyperbolicPriorSlope() + ". hyperbolicPriorSharpness: " + crft.getUseHyperbolicPriorSharpness() + ". gaussianPriorVariance: " + crft.getGaussianPriorVariance());
            crf = null;
            try {
                ObjectInputStream ois = new ObjectInputStream(new FileInputStream(f));
                crf = (CRF)ois.readObject();
                ois.close();
            }
            catch (IOException e) {
                System.err.println("Exception reading file: " + e);
            }
            catch (ClassNotFoundException cnfe) {
                System.err.println("Cound not find class reading in object: " + cnfe);
            }
            System.err.println("Read in CRF.");
            System.err.println("CRF parameters. hyperbolicPriorSlope: " + crft.getUseHyperbolicPriorSlope() + ". hyperbolicPriorSharpness: " + crft.getUseHyperbolicPriorSharpness() + ". gaussianPriorVariance: " + crft.getGaussianPriorVariance());
            try {
                oos = new ObjectOutputStream(new FileOutputStream(f2));
                oos.writeObject(crf);
                oos.close();
            }
            catch (IOException e) {
                System.err.println("Exception writing file: " + e);
            }
            System.err.println("Wrote out CRF");
            crf = saveCRF;
        }
        CRFOptimizableByLabelLikelihood mcrf = crft.getOptimizableCRF(ilist);
        double unconstrainedWeight = new SumLatticeDefault(crf, fvs).getTotalWeight();
        double constrainedWeight = new SumLatticeDefault((Transducer)crf, (Sequence)fvs, ss).getTotalWeight();
        double optimizableValue = 0.0;
        double gradientNorm = 0.0;
        double[] gradient = new double[mcrf.getNumParameters()];
        for (int i = 0; i < numStates; ++i) {
            for (int j = 0; j < numStates; ++j) {
                for (int k = 0; k < inputVocabSize; ++k) {
                    crf.setParameter(i, j, k, (k + i + j) * (k * i + i * j));
                    unconstrainedWeight = new SumLatticeDefault(crf, fvs).getTotalWeight();
                    constrainedWeight = new SumLatticeDefault((Transducer)crf, (Sequence)fvs, ss).getTotalWeight();
                    optimizableValue = mcrf.getValue();
                    mcrf.getValueGradient(gradient);
                    gradientNorm = MatrixOps.oneNorm(gradient);
                    System.out.println("parameters " + i + " " + j + " " + k + ": unconstrainedWeight =" + unconstrainedWeight + " constrainedWeight =" + constrainedWeight + " optimizableValue =" + optimizableValue + " gradientNorm =" + gradientNorm);
                }
            }
        }
        TestCRF.assertTrue("Value should be 35770 but is" + optimizableValue, Math.abs(optimizableValue + 35770.0) < 0.001);
        TestCRF.assertTrue(Math.abs(gradientNorm - 520.0) < 0.001);
    }

    public void testCost() {
        this.doTestCost(false);
    }

    public void testCostSerialized() {
        this.doTestCost(true);
    }

    public void testIncrement() {
    }

    public void testValueGradient() {
        this.doTestSpacePrediction(true);
    }

    public void testTrain() {
        this.doTestSpacePrediction(false);
    }

    public void doTestSpacePrediction(boolean testValueAndGradient) {
        Pipe p = this.makeSpacePredictionPipe();
        TestCRF2String p2 = new TestCRF2String();
        InstanceList instances = new InstanceList(p);
        instances.addThruPipe(new ArrayIterator(data));
        InstanceList[] lists = instances.split(new Random(1L), new double[]{0.5, 0.5});
        CRF crf = new CRF(p, p2);
        crf.addFullyConnectedStatesForLabels();
        CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
        if (testValueAndGradient) {
            CRFOptimizableByLabelLikelihood optable = crft.getOptimizableCRF(lists[0]);
            double[] gradient = new double[optable.getNumParameters()];
            optable.getValueGradient(gradient);
            TestOptimizable.testValueAndGradient(optable);
        } else {
            Sequence output;
            Sequence input;
            Instance inst;
            int i;
            System.out.println("Training Accuracy before training = " + crf.averageTokenAccuracy(lists[0]));
            System.out.println("Testing  Accuracy before training = " + crf.averageTokenAccuracy(lists[1]));
            System.out.println("Training...");
            crft.trainIncremental(lists[0]);
            System.out.println("Training Accuracy after training = " + crf.averageTokenAccuracy(lists[0]));
            System.out.println("Testing  Accuracy after training = " + crf.averageTokenAccuracy(lists[1]));
            System.out.println("Training results:");
            for (i = 0; i < lists[0].size(); ++i) {
                inst = (Instance)lists[0].get(i);
                input = (Sequence)inst.getData();
                output = crf.transduce(input);
                System.out.println(output);
            }
            System.out.println("Testing results:");
            for (i = 0; i < lists[1].size(); ++i) {
                inst = (Instance)lists[1].get(i);
                input = (Sequence)inst.getData();
                output = crf.transduce(input);
                System.out.println(output);
            }
        }
    }

    public void doTestSpacePrediction(boolean testValueAndGradient, boolean useSaved, boolean useSparseWeights) {
        Pipe p = this.makeSpacePredictionPipe();
        File f = new File("TestObject.obj");
        InstanceList instances = new InstanceList(p);
        instances.addThruPipe(new ArrayIterator(data));
        InstanceList[] lists = instances.split(new double[]{0.5, 0.5});
        CRF crf = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
        crf.addFullyConnectedStatesForLabels();
        CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
        crft.setUseSparseWeights(useSparseWeights);
        if (testValueAndGradient) {
            CRFOptimizableByLabelLikelihood minable = crft.getOptimizableCRF(lists[0]);
            TestOptimizable.testValueAndGradient(minable);
        } else {
            System.out.println("Training Accuracy before training = " + crf.averageTokenAccuracy(lists[0]));
            System.out.println("Testing  Accuracy before training = " + crf.averageTokenAccuracy(lists[1]));
            CRF savedCRF = crf;
            System.out.println("Training serialized crf.");
            crft.trainIncremental(lists[0]);
            double preTrainAcc = crf.averageTokenAccuracy(lists[0]);
            double preTestAcc = crf.averageTokenAccuracy(lists[1]);
            System.out.println("Training Accuracy after training = " + preTrainAcc);
            System.out.println("Testing  Accuracy after training = " + preTestAcc);
            try {
                ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(f));
                oos.writeObject(crf);
                oos.close();
            }
            catch (IOException e) {
                System.err.println("Exception writing file: " + e);
            }
            System.err.println("Wrote out CRF");
            System.err.println("CRF parameters. hyperbolicPriorSlope: " + crft.getUseHyperbolicPriorSlope() + ". hyperbolicPriorSharpness: " + crft.getUseHyperbolicPriorSharpness() + ". gaussianPriorVariance: " + crft.getGaussianPriorVariance());
            if (useSaved) {
                crf = null;
                try {
                    ObjectInputStream ois = new ObjectInputStream(new FileInputStream(f));
                    crf = (CRF)ois.readObject();
                    ois.close();
                }
                catch (IOException e) {
                    System.err.println("Exception reading file: " + e);
                }
                catch (ClassNotFoundException cnfe) {
                    System.err.println("Cound not find class reading in object: " + cnfe);
                }
                System.err.println("Read in CRF.");
                crf = savedCRF;
                double postTrainAcc = crf.averageTokenAccuracy(lists[0]);
                double postTestAcc = crf.averageTokenAccuracy(lists[1]);
                System.out.println("Training Accuracy after saving = " + postTrainAcc);
                System.out.println("Testing  Accuracy after saving = " + postTestAcc);
                TestCRF.assertEquals(postTrainAcc, preTrainAcc, 1.0E-4);
                TestCRF.assertEquals(postTestAcc, preTestAcc, 1.0E-4);
            }
        }
    }

    private Pipe makeSpacePredictionPipe() {
        SerialPipes p = new SerialPipes(new Pipe[]{new CharSequence2TokenSequence("."), new TokenSequenceLowercase(), new TestCRFTokenSequenceRemoveSpaces(), new TokenText(), new OffsetConjunctions(true, new int[][]{{0}, {1}, {-1, 0}}), new TokenSequence2FeatureVectorSequence()});
        return p;
    }

    public void testAddOrderNStates() {
        Pipe p = this.makeSpacePredictionPipe();
        InstanceList instances = new InstanceList(p);
        instances.addThruPipe(new ArrayIterator(data));
        InstanceList[] lists = instances.split(new Random(678L), new double[]{0.5, 0.5});
        CRF crf1 = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
        crf1.addOrderNStates(lists[0], new int[]{1}, new boolean[]{false}, "START", null, null, false);
        new CRFTrainerByLabelLikelihood(crf1).trainIncremental(lists[0]);
        CRF crf2 = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
        crf2.addOrderNStates(lists[0], new int[]{1, 2}, new boolean[]{false, true}, "START", null, null, false);
        new CRFTrainerByLabelLikelihood(crf2).trainIncremental(lists[0]);
        CRF crf3 = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
        crf3.addOrderNStates(lists[0], new int[]{1, 2}, new boolean[]{false, false}, "START", null, null, false);
        new CRFTrainerByLabelLikelihood(crf3).trainIncremental(lists[0]);
        double lik1 = this.getLikelihood(crf1, lists[0]);
        double lik2 = this.getLikelihood(crf2, lists[0]);
        double lik3 = this.getLikelihood(crf3, lists[0]);
        System.out.println("CRF1 likelihood " + lik1);
        TestCRF.assertTrue("Final zero-order likelihood <" + lik1 + "> greater than first-order <" + lik2 + ">", lik1 < lik2);
        TestCRF.assertTrue("Final defaults-only likelihood <" + lik2 + "> greater than full first-order <" + lik3 + ">", lik2 < lik3);
        TestCRF.assertEquals(-167.2234457483949, lik1, 1.0E-4);
        TestCRF.assertEquals(-165.81326484466342, lik2, 1.0E-4);
        TestCRF.assertEquals(-90.37680146432787, lik3, 1.0E-4);
    }

    double getLikelihood(CRF crf, InstanceList data) {
        CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
        CRFOptimizableByLabelLikelihood mcrf = crft.getOptimizableCRF(data);
        double[] params = new double[mcrf.getNumParameters()];
        mcrf.getParameters(params);
        mcrf.setParameters(params);
        return mcrf.getValue();
    }

    public void testFrozenWeights() {
        Pipe p = this.makeSpacePredictionPipe();
        InstanceList instances = new InstanceList(p);
        instances.addThruPipe(new ArrayIterator(data));
        CRF crf1 = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
        crf1.addFullyConnectedStatesForLabels();
        CRFTrainerByLabelLikelihood crft1 = new CRFTrainerByLabelLikelihood(crf1);
        crft1.trainIncremental(instances);
        CRF crf2 = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
        crf2.addFullyConnectedStatesForLabels();
        for (int i = 0; i < crf2.getWeights().length; i += 2) {
            crf2.freezeWeights(i);
        }
        CRFTrainerByLabelLikelihood crft2 = new CRFTrainerByLabelLikelihood(crf2);
        crft2.trainIncremental(instances);
        SparseVector[] w = crf2.getWeights();
        double[] b = crf2.getDefaultWeights();
        for (int i = 0; i < w.length; i += 2) {
            TestCRF.assertEquals(0.0, b[i], 1.0E-10);
            for (int loc = 0; loc < w[i].numLocations(); ++loc) {
                TestCRF.assertEquals(0.0, w[i].valueAtLocation(loc), 1.0E-10);
            }
        }
        CRFOptimizableByLabelLikelihood optable1 = crft1.getOptimizableCRF(instances);
        CRFOptimizableByLabelLikelihood optable2 = crft2.getOptimizableCRF(instances);
        double val1 = optable1.getValue();
        double val2 = optable2.getValue();
        TestCRF.assertTrue("Error: Freezing weights does not harm log-likelihood!  Full " + val1 + ", Frozen " + val2, val1 > val2);
    }

    public void testDenseTrain() {
        this.doTestSpacePrediction(false, false, false);
    }

    public void testTrainStochasticGradient() {
        Pipe p = this.makeSpacePredictionPipe();
        TestCRF2String p2 = new TestCRF2String();
        InstanceList instances = new InstanceList(p);
        instances.addThruPipe(new ArrayIterator(data));
        InstanceList[] lists = instances.split(new double[]{0.5, 0.5});
        CRF crf = new CRF(p, p2);
        crf.addFullyConnectedStatesForLabels();
        crf.setWeightsDimensionAsIn(lists[0], false);
        CRFTrainerByStochasticGradient crft = new CRFTrainerByStochasticGradient(crf, 1.0E-4);
        System.out.println("Training Accuracy before training = " + crf.averageTokenAccuracy(lists[0]));
        System.out.println("Testing  Accuracy before training = " + crf.averageTokenAccuracy(lists[1]));
        System.out.println("Training...");
        crft.setLearningRateByLikelihood(lists[0]);
        crft.train(lists[0], 100);
        crf.print();
        System.out.println("Training Accuracy after training = " + crf.averageTokenAccuracy(lists[0]));
        System.out.println("Testing  Accuracy after training = " + crf.averageTokenAccuracy(lists[1]));
    }

    public void testSumLatticeImplementations() {
        Pipe p = this.makeSpacePredictionPipe();
        TestCRF2String p2 = new TestCRF2String();
        InstanceList instances = new InstanceList(p);
        instances.addThruPipe(new ArrayIterator(data));
        InstanceList[] lists = instances.split(new double[]{0.5, 0.5});
        CRF crf = new CRF(p, p2);
        crf.addFullyConnectedStatesForLabels();
        crf.setWeightsDimensionAsIn(lists[0], false);
        CRFTrainerByStochasticGradient crft = new CRFTrainerByStochasticGradient(crf, 1.0E-4);
        System.out.println("Training Accuracy before training = " + crf.averageTokenAccuracy(lists[0]));
        System.out.println("Testing  Accuracy before training = " + crf.averageTokenAccuracy(lists[1]));
        System.out.println("Training...");
        crft.setLearningRateByLikelihood(lists[0]);
        crft.train(lists[0], 100);
        crf.print();
        System.out.println("Training Accuracy after training = " + crf.averageTokenAccuracy(lists[0]));
        System.out.println("Testing  Accuracy after training = " + crf.averageTokenAccuracy(lists[1]));
        long totalTimeDefault = 0L;
        long totalTimeScaling = 0L;
        for (int iter = 0; iter < 10000; ++iter) {
            for (int ii = 0; ii < lists[1].size(); ++ii) {
                FeatureVectorSequence input = (FeatureVectorSequence)((Instance)lists[1].get(ii)).getData();
                totalTimeDefault -= System.currentTimeMillis();
                SumLatticeDefault defaultLattice = new SumLatticeDefault((Transducer)crf, (Sequence)input, true);
                totalTimeDefault += System.currentTimeMillis();
                totalTimeScaling -= System.currentTimeMillis();
                SumLatticeScaling scalingLattice = new SumLatticeScaling((Transducer)crf, (Sequence)input, true);
                totalTimeScaling += System.currentTimeMillis();
                if (iter != 0) continue;
                TestCRF.assertEquals(defaultLattice.getTotalWeight(), scalingLattice.getTotalWeight(), 1.0E-4);
                double[][] g1 = defaultLattice.getGammas();
                double[][] g2 = scalingLattice.getGammas();
                for (int i = 0; i < g1.length; ++i) {
                    for (int j = 0; j < g1[i].length; ++j) {
                        TestCRF.assertEquals(g1[i][j], g2[i][j], 1.0E-4);
                    }
                }
                double[][][] x1 = defaultLattice.getXis();
                double[][][] x2 = scalingLattice.getXis();
                for (int i = 0; i < x1.length; ++i) {
                    for (int j = 0; j < x1[i].length; ++j) {
                        for (int k = 0; k < x1[i][j].length; ++k) {
                            TestCRF.assertEquals(x1[i][j][k], x2[i][j][k], 1.0E-4);
                        }
                    }
                }
            }
            if ((iter + 1) % 100 == 0) {
                System.out.print(iter + 1 + ". ");
                System.out.flush();
            }
            if ((iter + 1) % 1000 != 0) continue;
            System.out.println();
        }
        System.out.println();
        System.out.println("Time in ms (default) = " + totalTimeDefault);
        System.out.println("Time in ms (scaling) = " + totalTimeScaling);
        if (totalTimeScaling > totalTimeDefault) {
            System.out.println("SumLatticeDefault FTW!! (timeDiff=" + (totalTimeScaling - totalTimeDefault) + " ms)");
        } else {
            System.out.println("SumLatticeScaling FTW!! (timeDiff=" + (totalTimeDefault - totalTimeScaling) + " ms)");
        }
    }

    public void testSerialization() {
        this.doTestSpacePrediction(false, true, true);
    }

    public void testDenseSerialization() {
        this.doTestSpacePrediction(false, true, false);
    }

    public void testTokenAccuracy() {
        Pipe p = this.makeSpacePredictionPipe();
        InstanceList instances = new InstanceList(p);
        instances.addThruPipe(new ArrayIterator(data));
        InstanceList[] lists = instances.split(new Random(777L), new double[]{0.5, 0.5});
        CRF crf = new CRF(p.getDataAlphabet(), p.getTargetAlphabet());
        crf.addFullyConnectedStatesForLabels();
        CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
        crft.setUseSparseWeights(true);
        crft.trainIncremental(lists[0]);
        TokenAccuracyEvaluator eval = new TokenAccuracyEvaluator(lists, new String[]{"Train", "Test"});
        eval.evaluateInstanceList(crft, lists[1], "Test");
        TestCRF.assertEquals(0.9409, eval.getAccuracy("Test"), 0.001);
    }

    public void testPrint() {
        SerialPipes p = new SerialPipes(new Pipe[]{new CharSequence2TokenSequence("."), new TokenText(), new TestCRFTokenSequenceRemoveSpaces(), new TokenSequence2FeatureVectorSequence(), new PrintInputAndTarget()});
        InstanceList one = new InstanceList(p);
        Object[] data = new String[]{"ABCDE"};
        one.addThruPipe(new ArrayIterator(data));
        CRF crf = new CRF(p, null);
        crf.addFullyConnectedStatesForThreeQuarterLabels(one);
        CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
        crf.setWeightsDimensionAsIn(one, false);
        CRFOptimizableByLabelLikelihood mcrf = crft.getOptimizableCRF(one);
        double[] params = new double[mcrf.getNumParameters()];
        for (int i = 0; i < params.length; ++i) {
            params[i] = i;
        }
        mcrf.setParameters(params);
        crf.print();
    }

    public void testCopyStatesAndWeights() {
        SerialPipes p = new SerialPipes(new Pipe[]{new CharSequence2TokenSequence("."), new TokenText(), new TestCRFTokenSequenceRemoveSpaces(), new TokenSequence2FeatureVectorSequence(), new PrintInputAndTarget()});
        InstanceList one = new InstanceList(p);
        Object[] data = new String[]{"ABCDE"};
        one.addThruPipe(new ArrayIterator(data));
        CRF crf = new CRF(p, null);
        crf.addFullyConnectedStatesForLabels();
        CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
        crf.setWeightsDimensionAsIn(one, false);
        CRFOptimizableByLabelLikelihood mcrf = crft.getOptimizableCRF(one);
        double[] params = new double[mcrf.getNumParameters()];
        for (int i = 0; i < params.length; ++i) {
            params[i] = i;
        }
        mcrf.setParameters(params);
        StringWriter out2 = new StringWriter();
        crf.print(new PrintWriter((Writer)out2, true));
        System.out.println("------------- CRF1 -------------");
        crf.print();
        CRF crf2 = new CRF(crf);
        StringWriter out22 = new StringWriter();
        crf2.print(new PrintWriter((Writer)out22, true));
        System.out.println("------------- CRF2 -------------");
        crf2.print();
        TestCRF.assertEquals(out2.toString(), out22.toString());
        double val1 = mcrf.getValue();
        CRFTrainerByLabelLikelihood crft2 = new CRFTrainerByLabelLikelihood(crf2);
        double val2 = crft2.getOptimizableCRF(one).getValue();
        TestCRF.assertEquals(val1, val2, 1.0E-5);
    }

    public void testStartState() {
        SerialPipes p = new SerialPipes(new Pipe[]{new LineGroupString2TokenSequence(), new TokenSequenceMatchDataAndTarget(Pattern.compile("^(\\S+) (.*)"), 2, 1), new TokenSequenceParseFeatureString(false), new TokenText(), new TokenSequence2FeatureVectorSequence(true, false), new Target2LabelSequence(), new PrintInputAndTarget()});
        InstanceList data = new InstanceList(p);
        data.addThruPipe(new LineGroupIterator(new StringReader(toy), Pattern.compile("\n"), true));
        CRF crf = new CRF(p, null);
        crf.print();
        crf.addStatesForLabelsConnectedAsIn(data);
        crf.addStartState();
        CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
        CRFOptimizableByLabelLikelihood maxable = crft.getOptimizableCRF(data);
        TestCRF.assertEquals(-1.3862, maxable.getValue(), 1.0E-4);
        crf = new CRF(p, null);
        crf.addOrderNStates(data, new int[]{1}, null, "A", null, null, false);
        crf.print();
        crft = new CRFTrainerByLabelLikelihood(crf);
        maxable = crft.getOptimizableCRF(data);
        TestCRF.assertEquals(-3.09104245335831, maxable.getValue(), 1.0E-4);
    }

    public void testDenseFeatureSelection() {
        Pipe p = this.makeSpacePredictionPipe();
        InstanceList instances = new InstanceList(p);
        instances.addThruPipe(new ArrayIterator(data));
        CRF crf1 = new CRF(p, null);
        crf1.addOrderNStates(instances, new int[]{0}, null, "start", null, null, true);
        CRFTrainerByLabelLikelihood crft1 = new CRFTrainerByLabelLikelihood(crf1);
        crft1.setUseSparseWeights(false);
        crft1.train(instances, 1);
        int nParams1 = crft1.getOptimizableCRF(instances).getNumParameters();
        CRF crf2 = new CRF(p, null);
        crf2.addOrderNStates(instances, new int[]{0, 1}, new boolean[]{false, true}, "start", null, null, true);
        CRFTrainerByLabelLikelihood crft2 = new CRFTrainerByLabelLikelihood(crf2);
        crft2.setUseSparseWeights(false);
        crft2.train(instances, 1);
        int nParams2 = crft2.getOptimizableCRF(instances).getNumParameters();
        TestCRF.assertEquals(nParams2, nParams1 + 4);
    }

    public void testXis() {
        Pipe p = this.makeSpacePredictionPipe();
        InstanceList instances = new InstanceList(p);
        instances.addThruPipe(new ArrayIterator(data));
        CRF crf1 = new CRF(p, null);
        crf1.addFullyConnectedStatesForLabels();
        CRFTrainerByLabelLikelihood crft1 = new CRFTrainerByLabelLikelihood(crf1);
        crft1.train(instances, 10);
        Instance inst = (Instance)instances.get(0);
        Sequence input = (Sequence)inst.getData();
        SumLatticeDefault lattice = new SumLatticeDefault((Transducer)crf1, input, (Sequence)inst.getTarget(), null, true);
        for (int ip = 0; ip < lattice.length() - 1; ++ip) {
            for (int i = 0; i < crf1.numStates(); ++i) {
                Transducer.State state = crf1.getState(i);
                Transducer.TransitionIterator it = state.transitionIterator(input, ip);
                double gamma = lattice.getGammaProbability(ip, state);
                double xiSum = 0.0;
                while (it.hasNext()) {
                    Transducer.State dest = it.nextState();
                    double xi = lattice.getXiProbability(ip, state, dest);
                    xiSum += xi;
                }
                TestCRF.assertEquals(gamma, xiSum, 1.0E-5);
            }
        }
    }

    public static Test suite() {
        return new TestSuite((Class<? extends TestCase>)TestCRF.class);
    }

    public void testStateAddWeights() {
        Pipe p = this.makeSpacePredictionPipe();
        InstanceList training = new InstanceList(p);
        training.addThruPipe(new ArrayIterator(data));
        CRF crf = new CRF(p, null);
        crf.addFullyConnectedStatesForLabels();
        CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf);
        crft.trainIncremental(training);
        Sequence input = (Sequence)((Instance)training.get(0)).getData();
        Sequence<Object> output = new MaxLatticeDefault(crf, input).bestOutputSequence();
        boolean notstartFound = false;
        for (int i = 0; i < output.size(); ++i) {
            if (!output.get(i).toString().equals("notstart")) continue;
            notstartFound = true;
        }
        System.err.println(output.toString());
        TestCRF.assertTrue(notstartFound);
        CRF.State state = crf.getState("notstart");
        int widx = crf.getWeightsIndex("BadBad");
        int numFeatures = crf.getInputAlphabet().size();
        SparseVector w = new SparseVector(new double[numFeatures]);
        w.setAll(Double.NEGATIVE_INFINITY);
        crf.setWeights(widx, w);
        state.addWeight(0, "BadBad");
        state.addWeight(1, "BadBad");
        output = new MaxLatticeDefault(crf, input).bestOutputSequence();
        notstartFound = false;
        for (int i = 0; i < output.size() - 1; ++i) {
            if (!output.get(i).toString().equals("notstart")) continue;
            notstartFound = true;
        }
        TestCRF.assertTrue(!notstartFound);
    }

    public void skiptestOldCrf() {
        CRF crf = (CRF)FileUtils.readObject(new File(oldCrfFile));
        Instance inst = crf.getInputPipe().instanceFrom(new Instance(testString, null, null, null));
        Sequence output = crf.transduce((Sequence)inst.getData());
        String std = output.toString();
        TestCRF.assertEquals(" B-PER I-PER O O", std);
    }

    public static void main(String[] args) {
        TestSuite theSuite;
        if (args.length > 0) {
            theSuite = new TestSuite();
            for (int i = 0; i < args.length; ++i) {
                theSuite.addTest(new TestCRF(args[i]));
            }
        } else {
            theSuite = (TestSuite)TestCRF.suite();
        }
        TestRunner.run(theSuite);
    }

    public class TestCRF2String
    extends Pipe
    implements Serializable {
        private static final long serialVersionUID = 1L;
        private static final int CURRENT_SERIAL_VERSION = 0;

        @Override
        public Instance pipe(Instance carrier) {
            StringBuffer sb = new StringBuffer();
            String source = (String)carrier.getSource();
            Sequence as = (Sequence)carrier.getTarget();
            for (int i = 0; i < source.length(); ++i) {
                System.out.println("target[" + i + "]=" + as.get(i).toString());
                if (as.get(i).toString().equals("start") && i != 0) {
                    sb.append(' ');
                }
                sb.append(source.charAt(i));
            }
            carrier.setSource(sb.toString());
            System.out.println("carrier.getSource() = " + carrier.getSource());
            return carrier;
        }

        private void writeObject(ObjectOutputStream out2) throws IOException {
            out2.writeInt(0);
        }

        private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
            int version = in.readInt();
        }
    }

    public static class TestCRFTokenSequenceRemoveSpaces
    extends Pipe
    implements Serializable {
        private static final long serialVersionUID = 1L;
        private static final int CURRENT_SERIAL_VERSION = 0;

        public TestCRFTokenSequenceRemoveSpaces() {
            super(null, new Alphabet());
        }

        @Override
        public Instance pipe(Instance carrier) {
            TokenSequence ts = (TokenSequence)carrier.getData();
            TokenSequence newTs = new TokenSequence();
            FeatureSequence labelSeq = new FeatureSequence(this.getTargetAlphabet());
            boolean lastWasSpace = true;
            StringBuffer sb = new StringBuffer();
            for (int i = 0; i < ts.size(); ++i) {
                Token t = (Token)ts.get(i);
                if (t.getText().equals(" ")) {
                    lastWasSpace = true;
                    continue;
                }
                sb.append(t.getText());
                newTs.add(t);
                labelSeq.add(lastWasSpace ? "start" : "notstart");
                lastWasSpace = false;
            }
            if (this.isTargetProcessing()) {
                carrier.setTarget(labelSeq);
            }
            carrier.setData(newTs);
            carrier.setSource(sb.toString());
            return carrier;
        }

        private void writeObject(ObjectOutputStream out2) throws IOException {
            out2.writeInt(0);
        }

        private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
            int version = in.readInt();
        }
    }
}

