locked
dynamic bayesian network using shared variables RRS feed

  • Question

  • Is there an example somewhere of how to implement a dynamic bayesian network using shared variables?

    As a simple example, I have the following model:

    x = new Variable<double>[n];
    m_x = Variable.GaussianFromMeanAndVariance(1.0,1e+6);
    v_x  = Variable.GammaFromMeanAndVariance(1.0,1e+6);
    x[0] = Variable.GaussianFromMeanAndVariance(m_x, v_x);
    x[1] = Variable.GaussianFromMeanAndVariance(0.5*x[0] + 0.5*m_x, v_x).;
    for (int i = 2; i < n; i++) 
    {
    al[i] = Variable.GaussianFromMeanAndVariance(0.5*x[i-1] + 0.5*x[i-2], v_x);
    }
    I would like to make n as large as 1000, which does not work when x is defined as an array of random variables (StackOverflowException). Can this be done using shared variables? And what would that look like? I've looked at the LDAShared example, but haven't figured out how to translate it to my setting. Thanks!

    Wednesday, July 17, 2013 8:27 AM

Answers

  • You can build a start-time model and slice model and hook them up using the shared variable ideas described in the user guide. For chained models like this it is easier to manually flange together the individual models. Below is an example using fixed mean and variance which shows the most difficult aspect of this - namely hooking together the states. I haven't fully checked that this operates as required, but it should give the general idea.

    John

    using MicrosoftResearch.Infer;
    using MicrosoftResearch.Infer.Distributions;
    using MicrosoftResearch.Infer.Models;
    using MicrosoftResearch.Infer.Utils;
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    
    namespace DynamicBayesianNetwork
    {
        public class DBNSlice
        {
            public VariableArray<VariableArray<double>, double[][]> A;
            public Variable<int> XDim;
            public VariableArray<double> XIn;
            public VariableArray<double> XOut;
            public VariableArray<Gaussian> XInPrior;
            public VariableArray<Gaussian> XOutConstraint;
            public VariableArray<Gaussian> X0Prior;
            public Variable<double> NoiseVariance;
            public bool IsStartSlice;
            public InferenceEngine Engine;
            public Gaussian[] xInPosterior;
            public Gaussian[] xOutPosterior;
    
            public DBNSlice(bool isStartSlice = false)
            {
                this.IsStartSlice = isStartSlice;
                this.XDim = Variable.New<int>().Named("XDim");
                var row = new Range(XDim).Named("row");
                var col = row.Clone().Named("col");
                this.A = Variable.Array(Variable.Array<double>(col), row).Named("A");
                this.NoiseVariance = Variable.New<double>().Named("v_x");
    
                if (this.IsStartSlice)
                {
                    this.X0Prior = Variable.Array<Gaussian>(row).Named("X0Prior");
                }
                else
                {
                    this.XInPrior = Variable.Array<Gaussian>(col).Named("XInPrior");
                    this.XIn = Variable<double>.Array(col).Named("XIn");
                    XIn[col] = Variable<double>.Random(this.XInPrior[col]);
                }
    
                this.XOut = Variable<double>.Array(row).Named("XOut");
    
                if (this.IsStartSlice)
                {
                    this.XOut[row].SetTo(Variable<double>.Random(this.X0Prior[row]));
                }
                else
                {
                    using (Variable.ForEach(row))
                    {
                        var prod = Variable.Array<double>(col);
                        prod[col] = A[row][col] * XIn[col];
                        var dot = Variable.Sum(prod);
                        this.XOut[row] = Variable.GaussianFromMeanAndVariance(dot, this.NoiseVariance);
                    }
                }
    
                // Constrain to incoming downstream messages
                this.XOutConstraint = Variable.Array<Gaussian>(row).Named("XOutConstraint");
                Variable.ConstrainEqualRandom(this.XOut[row], this.XOutConstraint[row]);
                this.Engine = new InferenceEngine() { NumberOfIterations = 5, ShowProgress = false};
            }
    
            public void Train(
                Gaussian[] x0Prior,
                Gaussian[] xInPrior,
                Gaussian[] xOutConstraint,
                double[][] a,
                double noiseVariance)
            {
                var xDim = x0Prior.Length;
                this.XDim.ObservedValue = xDim;
    
                if (this.IsStartSlice)
                {
                    this.X0Prior.ObservedValue = x0Prior;
                }
                else
                {
                    this.XInPrior.ObservedValue = xInPrior;
                }
    
                this.A.ObservedValue = a;
                this.XOutConstraint.ObservedValue = xOutConstraint;
                this.NoiseVariance.ObservedValue = noiseVariance;
    
                this.XOutConstraint.ObservedValue = xOutConstraint;
                this.xInPosterior = this.IsStartSlice ? x0Prior : this.Engine.Infer<Gaussian[]>(this.XIn);
                this.xOutPosterior = this.Engine.Infer<Gaussian[]>(this.XOut);
            }
    
            static public void TrainDBN(
                int nTimeSlices,
                double[][] a,
                double m_x,
                double v_x,
                int numIters)
            {
                var startModel = new DBNSlice(true);
                var sliceModel = new DBNSlice(false);
                int xDim = a.Length;
    
                Gaussian[] x0Prior = Util.ArrayInit(xDim, d => Gaussian.FromMeanAndVariance(m_x, v_x));
    
                // We  need to maintain the output messages from each copy of the state variables at each time step.
                // There are two such copies - one output message when the state is a model input, and one when it is a model output.
                var xOutput = Util.ArrayInit(nTimeSlices, n => Util.ArrayInit(2, m => Util.ArrayInit(xDim, g => Gaussian.Uniform())));
                var xMarginal = Util.ArrayInit(nTimeSlices, n => Util.ArrayInit(xDim, g => Gaussian.Uniform()));
                var xInInput = Util.ArrayInit(xDim, g => Gaussian.Uniform());
                var xOutConstraint = Util.ArrayInit(xDim, g => Gaussian.Uniform());
    
                for (int it = 0; it < numIters; it++)
                {
                    // A backward and forward pass over the time slices
                    for (var bckFwd = 0; bckFwd < (2 * nTimeSlices) - 1; bckFwd++)
                    {
                        var t = bckFwd < nTimeSlices ? bckFwd : (2 * nTimeSlices) - 2 - bckFwd;
    
                        // Input message = marginals / output message
                        for (int d = 0; d < xDim; d++)
                        {
                            if (t > 0)
                                xInInput[d].SetToRatio(xMarginal[t - 1][d], xOutput[t - 1][0][d]);
                            xOutConstraint[d].SetToRatio(xMarginal[t][d], xOutput[t][1][d]);
                        }
    
                        // Inference
                        var model = t == 0 ? startModel : sliceModel;
                        model.Train(x0Prior, xInInput, xOutConstraint, a, v_x);
    
                        for (int d = 0; d < xDim; d++)
                        {
                            // Recover the marginals
                            if (t > 0)
                                xMarginal[t - 1][d].SetTo(model.xInPosterior[d]);
                            xMarginal[t][d].SetTo(model.xOutPosterior[d]);
    
                            // Set the output messages
                            if (t > 0)
                                xOutput[t - 1][0][d].SetToRatio(xMarginal[t - 1][d], xInInput[d], false);
                            xOutput[t][1][d].SetToRatio(xMarginal[t][d], xOutConstraint[d], false);
                        }
                    }
                }
    
                for (int t = 0; t < nTimeSlices; t++)
                {
                    for (int d = 0; d < xDim; d++)
                    {
                        Console.WriteLine("{0}\t", xMarginal[t][d]);
                    }
                }
            }
        }
    
        class Program
        {
            static void Main(string[] args)
            {
                var A = new double[][] { new double[] { 0.5, 0.5 }, new double[] { 1.0, 0.0 } };
    
                DBNSlice.TrainDBN(1000, A, 0, 1, 5);
            }
        }
    }
    

    Thursday, July 25, 2013 10:43 AM
    Owner

All replies

  • Hi Alex

    You can certainly do this with shared variables, but depending on what you want to do, an easier thing may be to use 'offset indexing'. This is a feature that is not documented as the model compiler only supports it in special circumstances. If you batch your states into an array, then you can treat the first index as a special case as shown below. See how you get on with that. If this does not support your use cases, then let me know and I will show how to do the shared variable version.

    John

    using MicrosoftResearch.Infer;
    using MicrosoftResearch.Infer.Models;
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    
    namespace DynamicBayesianNetwork
    {
        class Program
        {
            static void Main(string[] args)
            {
                var n = new Range(1000);
                var row = new Range(2);
                var col = row.Clone();
                var x = Variable.Array(Variable.Array<double>(row), n);
                var A = Variable.Array(Variable.Array<double>(col), row);
                var m_x = Variable.GaussianFromMeanAndVariance(1.0, 1e+6);
                var v_x  = Variable.GammaFromMeanAndVariance(1.0,1e+6);
    
                using (var fe = Variable.ForEach(n))
                {
                    using (Variable.If(fe.Index == 0))
                    {
                        x[n][row] = Variable.GaussianFromMeanAndVariance(m_x, v_x).ForEach(row);
                    }
                    using (Variable.If(fe.Index > 0))
                    {
                        using (Variable.ForEach(row))
                        {
                            var prod = Variable.Array<double>(col);
                            prod[col] = A[row][col] * x[fe.Index - 1][col];
                            var dot = Variable.Sum(prod);
                            x[n][row] = Variable.GaussianFromMeanAndVariance(dot, v_x);
                        }
                    }
                }
    
                var engine = new InferenceEngine();
                A.ObservedValue = new double[][] { new double[] { 0.5, 0.5 }, new double[] { 1.0, 0.0 } };
                Console.WriteLine(engine.Infer(x));
            }
        }
    }
    

    Monday, July 22, 2013 8:56 AM
    Owner
  • Hi John,

    Thanks a lot, that's a very useful construction! I will play around with it a bit. If you don't mind, it would also be nice to see the shared variable version of the same model, even though it's overkill on this simple example. I suspect others may find this helpful as well.

    Alex 

    Tuesday, July 23, 2013 4:17 PM
  • You can build a start-time model and slice model and hook them up using the shared variable ideas described in the user guide. For chained models like this it is easier to manually flange together the individual models. Below is an example using fixed mean and variance which shows the most difficult aspect of this - namely hooking together the states. I haven't fully checked that this operates as required, but it should give the general idea.

    John

    using MicrosoftResearch.Infer;
    using MicrosoftResearch.Infer.Distributions;
    using MicrosoftResearch.Infer.Models;
    using MicrosoftResearch.Infer.Utils;
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    
    namespace DynamicBayesianNetwork
    {
        public class DBNSlice
        {
            public VariableArray<VariableArray<double>, double[][]> A;
            public Variable<int> XDim;
            public VariableArray<double> XIn;
            public VariableArray<double> XOut;
            public VariableArray<Gaussian> XInPrior;
            public VariableArray<Gaussian> XOutConstraint;
            public VariableArray<Gaussian> X0Prior;
            public Variable<double> NoiseVariance;
            public bool IsStartSlice;
            public InferenceEngine Engine;
            public Gaussian[] xInPosterior;
            public Gaussian[] xOutPosterior;
    
            public DBNSlice(bool isStartSlice = false)
            {
                this.IsStartSlice = isStartSlice;
                this.XDim = Variable.New<int>().Named("XDim");
                var row = new Range(XDim).Named("row");
                var col = row.Clone().Named("col");
                this.A = Variable.Array(Variable.Array<double>(col), row).Named("A");
                this.NoiseVariance = Variable.New<double>().Named("v_x");
    
                if (this.IsStartSlice)
                {
                    this.X0Prior = Variable.Array<Gaussian>(row).Named("X0Prior");
                }
                else
                {
                    this.XInPrior = Variable.Array<Gaussian>(col).Named("XInPrior");
                    this.XIn = Variable<double>.Array(col).Named("XIn");
                    XIn[col] = Variable<double>.Random(this.XInPrior[col]);
                }
    
                this.XOut = Variable<double>.Array(row).Named("XOut");
    
                if (this.IsStartSlice)
                {
                    this.XOut[row].SetTo(Variable<double>.Random(this.X0Prior[row]));
                }
                else
                {
                    using (Variable.ForEach(row))
                    {
                        var prod = Variable.Array<double>(col);
                        prod[col] = A[row][col] * XIn[col];
                        var dot = Variable.Sum(prod);
                        this.XOut[row] = Variable.GaussianFromMeanAndVariance(dot, this.NoiseVariance);
                    }
                }
    
                // Constrain to incoming downstream messages
                this.XOutConstraint = Variable.Array<Gaussian>(row).Named("XOutConstraint");
                Variable.ConstrainEqualRandom(this.XOut[row], this.XOutConstraint[row]);
                this.Engine = new InferenceEngine() { NumberOfIterations = 5, ShowProgress = false};
            }
    
            public void Train(
                Gaussian[] x0Prior,
                Gaussian[] xInPrior,
                Gaussian[] xOutConstraint,
                double[][] a,
                double noiseVariance)
            {
                var xDim = x0Prior.Length;
                this.XDim.ObservedValue = xDim;
    
                if (this.IsStartSlice)
                {
                    this.X0Prior.ObservedValue = x0Prior;
                }
                else
                {
                    this.XInPrior.ObservedValue = xInPrior;
                }
    
                this.A.ObservedValue = a;
                this.XOutConstraint.ObservedValue = xOutConstraint;
                this.NoiseVariance.ObservedValue = noiseVariance;
    
                this.XOutConstraint.ObservedValue = xOutConstraint;
                this.xInPosterior = this.IsStartSlice ? x0Prior : this.Engine.Infer<Gaussian[]>(this.XIn);
                this.xOutPosterior = this.Engine.Infer<Gaussian[]>(this.XOut);
            }
    
            static public void TrainDBN(
                int nTimeSlices,
                double[][] a,
                double m_x,
                double v_x,
                int numIters)
            {
                var startModel = new DBNSlice(true);
                var sliceModel = new DBNSlice(false);
                int xDim = a.Length;
    
                Gaussian[] x0Prior = Util.ArrayInit(xDim, d => Gaussian.FromMeanAndVariance(m_x, v_x));
    
                // We  need to maintain the output messages from each copy of the state variables at each time step.
                // There are two such copies - one output message when the state is a model input, and one when it is a model output.
                var xOutput = Util.ArrayInit(nTimeSlices, n => Util.ArrayInit(2, m => Util.ArrayInit(xDim, g => Gaussian.Uniform())));
                var xMarginal = Util.ArrayInit(nTimeSlices, n => Util.ArrayInit(xDim, g => Gaussian.Uniform()));
                var xInInput = Util.ArrayInit(xDim, g => Gaussian.Uniform());
                var xOutConstraint = Util.ArrayInit(xDim, g => Gaussian.Uniform());
    
                for (int it = 0; it < numIters; it++)
                {
                    // A backward and forward pass over the time slices
                    for (var bckFwd = 0; bckFwd < (2 * nTimeSlices) - 1; bckFwd++)
                    {
                        var t = bckFwd < nTimeSlices ? bckFwd : (2 * nTimeSlices) - 2 - bckFwd;
    
                        // Input message = marginals / output message
                        for (int d = 0; d < xDim; d++)
                        {
                            if (t > 0)
                                xInInput[d].SetToRatio(xMarginal[t - 1][d], xOutput[t - 1][0][d]);
                            xOutConstraint[d].SetToRatio(xMarginal[t][d], xOutput[t][1][d]);
                        }
    
                        // Inference
                        var model = t == 0 ? startModel : sliceModel;
                        model.Train(x0Prior, xInInput, xOutConstraint, a, v_x);
    
                        for (int d = 0; d < xDim; d++)
                        {
                            // Recover the marginals
                            if (t > 0)
                                xMarginal[t - 1][d].SetTo(model.xInPosterior[d]);
                            xMarginal[t][d].SetTo(model.xOutPosterior[d]);
    
                            // Set the output messages
                            if (t > 0)
                                xOutput[t - 1][0][d].SetToRatio(xMarginal[t - 1][d], xInInput[d], false);
                            xOutput[t][1][d].SetToRatio(xMarginal[t][d], xOutConstraint[d], false);
                        }
                    }
                }
    
                for (int t = 0; t < nTimeSlices; t++)
                {
                    for (int d = 0; d < xDim; d++)
                    {
                        Console.WriteLine("{0}\t", xMarginal[t][d]);
                    }
                }
            }
        }
    
        class Program
        {
            static void Main(string[] args)
            {
                var A = new double[][] { new double[] { 0.5, 0.5 }, new double[] { 1.0, 0.0 } };
    
                DBNSlice.TrainDBN(1000, A, 0, 1, 5);
            }
        }
    }
    

    Thursday, July 25, 2013 10:43 AM
    Owner
  • Hi,

    I just tried to implement another DBN using the offset indexing trick shown above. The DBN is supposed to represent a probabilistic version of a finite state machine which emits discrete symbols in a discrete time series. Thus, it is practically a Bayeisan HMM, but with an additional dependency between each state and the symbol from the previous time step.

    It looks as shown in the code block below, but the compiler does not accept it. The error message is "The model has a loop of deterministic edges, which is not allowed by Variational Message Passing.". The model uses VMP, but for Gibbs samping, the same error message is returned. EP compiles, but the result does not seem to make sense. 

    Is this particular model one of those not supported yet, or is there a trick to make it work? I don't really understand what the error message means.

    public void TrainDBN()
            {
                // model dimensions
                var numberOfStates = Variable.New<int>().Named("numberOfStates");
                var state = new Range(numberOfStates).Named("stateRange");
                var numberOfSymbols = Variable.New<int>().Named("numberOfSymbols");
                var symbol = new Range(numberOfSymbols).Named("symbolRange");
    
                // priors and parameters
                var piPrior = Variable.New<Dirichlet>().Named("piPrior");
                var pi = Variable<Vector>.Random<Dirichlet>(piPrior).Named("pi");
    
                var emissionPrior = Variable.Array<Dirichlet>(state).Named("emitionPrior");
                var emission = Variable.Array<Vector>(state).Named("emition");
                emission[state] = Variable<Vector>.Random<Dirichlet>(emissionPrior[state]);
    
                var transitionPrior = Variable.Array(Variable.Array<Dirichlet>(symbol), state).Named("transitionPrior");
                var transition = Variable.Array(Variable.Array<Vector>(symbol), state).Named("transition");
                transition[state][symbol] = Variable<Vector>.Random(transitionPrior[state][symbol]);
    
                // chain variables
                var chainLength = Variable.New<int>().Named("chainLength");
                var N = new Range(chainLength).Named("N");
                var y = Variable.Array<int>(N).Named("y"); // hidden state chain
                y.SetValueRange(state);
                var x = Variable.Array<int>(N).Named("x"); // observed symbol chain
                x.SetValueRange(symbol);
    
                using (ForEachBlock fe = Variable.ForEach(N)) // iterate time steps
                {
                    using (Variable.If(fe.Index == 0))
                    {
                        // first time step: use initial state distribution (pi)
                        y[N].SetTo(Variable.Discrete(pi));
                    }
                    using (Variable.If(fe.Index > 0))
                    {
                        // all other time steps: condition the state on the previous state and the previous symbol (transition CPTs)
                        using (Variable.Switch(y[fe.Index - 1]))
                        {
                            using (Variable.Switch(x[fe.Index - 1]))
                            {
                                y[N].SetTo(Variable.Discrete(transition[y[fe.Index - 1]][x[fe.Index - 1]]));
                            }
                        }
                    }
                    // all time steps: condition symbol on state (emission CPTs)
                    using (Variable.Switch(y[fe.Index]))
                    {
                        x[N].SetTo(Variable.Discrete(emission[y[fe.Index]]));
                    }
                }
    
                // set observed values (data and model dimensions)
                int[] data = new int[] { 0, 1, 2, 1, 2, 1, 3 };
                chainLength.ObservedValue = data.Length;
                x.ObservedValue = data;
                numberOfSymbols.ObservedValue = data.Distinct().Count();
                numberOfStates.ObservedValue = 5;
    
                // set symmetric priors
                piPrior.ObservedValue = Dirichlet.Symmetric(numberOfStates.ObservedValue, 1.0);
                emissionPrior.ObservedValue = Utils.SymmetricDirichletArray(numberOfStates.ObservedValue, numberOfSymbols.ObservedValue, 1.0);
                transitionPrior.ObservedValue = Utils.SymmetricDirichletArray2D(numberOfStates.ObservedValue, numberOfSymbols.ObservedValue, numberOfStates.ObservedValue, 1.0);
    
                // initialize messages (random dirichlets with pseudocounts between 1 and 2)
                int minPseudoCount = 1;
                int maxPsuedoCount = 2;
                pi.InitialiseTo(Utils.RandomDirichlet(numberOfStates.ObservedValue, minPseudoCount, maxPsuedoCount));
                emission.InitialiseTo(Utils.RandomDirichletArray(numberOfStates.ObservedValue, numberOfSymbols.ObservedValue, minPseudoCount, maxPsuedoCount));
                transition.InitialiseTo(Utils.RandomDirichletArray2D(numberOfStates.ObservedValue, numberOfSymbols.ObservedValue, numberOfStates.ObservedValue, minPseudoCount, maxPsuedoCount));
    
                // do inference
                InferenceEngine engine = new InferenceEngine(new VariationalMessagePassing());
                var piPosterior = engine.Infer<Dirichlet>(pi);
                var emissionPosterior = engine.Infer<Dirichlet[]>(emission);
                var transitionPosterior = engine.Infer<Dirichlet[][]>(transition);
            }

    p.s. I already have a working model of this kind which is unrolled in time (working = seems to produce reasonable numbers). The problem is that the algorithms it generates are pretty huge (36MB class file for gibbs samling algorithm with N = 50, slightly smaller files for deterministic algorithms).

    I would really love to have a more compact version, ideally avoiding the shared variable solution described above.

    Friday, July 26, 2013 1:07 PM
  • First, I would advise against using VMP in a long chain. To see why, take a look at http://videolectures.net/mlss09uk_minka_ai/ (look at part 2 around the 85th minute - but both lectures are well worth watching).

    Second, can you try EP again, but with the following 2 changes

    (1) When setting the symmetric priors, use a smaller pseudo-count

    (2) Initialise the states rather than the parameters.

    So try this:

                // set symmetric priors
                int nStates = numberOfStates.ObservedValue;
                int nSymbols = numberOfSymbols.ObservedValue;
    
                piPrior.ObservedValue = Dirichlet.Symmetric(nStates, .1);
                emissionPrior.ObservedValue = Utils.SymmetricDirichletArray(nStates,nSymbols, .1);
                transitionPrior.ObservedValue = Utils.SymmetricDirichletArray2D(nStates, nSymbols, nStates, .1);
    
                // initialize messages (random dirichlets with pseudocounts between 1 and 2)
                //int minPseudoCount = 1;
                //int maxPsuedoCount = 2;
                //pi.InitialiseTo(Utils.RandomDirichlet(numberOfStates.ObservedValue, minPseudoCount, maxPsuedoCount));
                //emission.InitialiseTo(Utils.RandomDirichletArray(numberOfStates.ObservedValue, numberOfSymbols.ObservedValue, minPseudoCount, maxPsuedoCount));
                //transition.InitialiseTo(Utils.RandomDirichlet2D(numberOfStates.ObservedValue, numberOfSymbols.ObservedValue, numberOfStates.ObservedValue, minPseudoCount, maxPsuedoCount));
    
                y.InitialiseTo(Utils.RandomDiscreteArray(chainLength.ObservedValue, nStates));
    
                // do inference
                InferenceEngine engine = new InferenceEngine();
                var piPosterior = engine.Infer<Dirichlet>(pi);
                var emissionPosterior = engine.Infer<Dirichlet[]>(emission);
                var transitionPosterior = engine.Infer<Dirichlet[][]>(transition);
                var yPosterior = engine.Infer<Discrete[]>(y);

    where

            public static IDistribution<int[]> RandomDiscreteArray(int arrayDim, int dim)
            {
                Discrete d = Discrete.Uniform(dim);
                return Distribution<int>.Array(Util.ArrayInit(arrayDim, i => Discrete.PointMass(d.Sample(), dim)));
            }
    John

    Monday, July 29, 2013 9:50 AM
    Owner
  • Thanks a lot John, for the shared-variable implementation above, this is very helpful. Indeed a bit more involved than the offset indexing approach, but also more general it seems. I'm going to play around with it and try to extend it to more complex models (variable variance etc). Thanks.
    Tuesday, July 30, 2013 8:02 AM