locked
Scalable Gaussian mixture model RRS feed

  • Question

  • Dear all,

    (deleted my previous post by mistake)

    I am new to Infer .NET. I am playing around with univariate Gaussian mixture that can be scaled to a large number of data and large number of mixture components. I am not entirely sure that the way I infer the mixture labels is correct in the following code. Could anyone advise whether I need to make the mixture label distribution variable z a SharedVariable as well? If so how would I go about it?

    Thanks.

        using GaussianArray = DistributionStructArray<Gaussian, double>;
        using GammaArray = DistributionStructArray<Gamma, double>;
    
        class Program
        {
            static void Main(string[] args)
            {
                int numClusters = 2;
                //generate data from two Gaussians
                int N = 100; //per chunk
                int numChunks = 3;
                double[][] data = new double[numChunks][];
                double[] dataLong = new double[numChunks * N];
                Random rnd = new Random();
                for (int j = 0; j < numChunks; j++)
                {
                    double[] temp = new double[N];
                    for (int i = 0; i < N; i++)
                    {
                        double x = rnd.NextDouble();                    
                        if (x < 0.50)
                        {
                            temp[i] = Rand.Normal(150.0, 3.0);
                        }
                        else
                        {
                            temp[i] = Rand.Normal(15.0, 2.0);
                        }                        
                        dataLong[i] = temp[i];
                    }
                    data[j] = temp;
                }
    
                //run_3(dataLong, numClusters);
                run_4(data, numChunks, numClusters, N);                      
            }
    
            static void run_4(double[][] data, int numChunks, int numClusters, int N)
            {
                Model model = new Model(numChunks);
    
                Range K = new Range(numClusters);
    
                VariableArray<Gaussian> meanPrior = Variable.Array<Gaussian>(K);
                VariableArray<Gamma> precisionPrior = Variable.Array<Gamma>(K);
    
                SharedVariableArray<double> mean = SharedVariable<double>.Random<GaussianArray>(K, makeGaussianArray(numClusters));
                SharedVariableArray<double> precision = SharedVariable<double>.Random(K, makeGammaArray(numClusters));
    
                SharedVariable<Vector> mixCoef = SharedVariable<Vector>.Random(Dirichlet.Symmetric(numClusters, 1));
                      
                Variable<int> dataCount = Variable.New<int>();
                Range dataRange = new Range(dataCount);
    
                VariableArray<int> z = Variable.Array<int>(dataRange);            
                Variable<IDistribution<int[]>> zInit = Variable.New<IDistribution<int[]>>();
                z.InitialiseTo(zInit);
    
                VariableArray<double> y = Variable.Array<double>(dataRange);
    
                Variable<Vector> coefModel = mixCoef.GetCopyFor(model);
                coefModel.SetValueRange(K);
                VariableArray<double> meanModel = mean.GetCopyFor(model);
                VariableArray<double> precisionModel = precision.GetCopyFor(model);
    
                using (Variable.ForEach(dataRange))
                {
                    z[dataRange] = Variable.Discrete(coefModel);
                    using (Variable.Switch(z[dataRange]))
                    {
                        y[dataRange].SetTo(Variable.GaussianFromMeanAndPrecision(meanModel[z[dataRange]], precisionModel[z[dataRange]]));
                    }
                }
    
                InferenceEngine e = new InferenceEngine();
                //e.NumberOfIterations = 1;
                //e.ShowProgress = false;            
    
                //make posterior z  for each batch
                Discrete[][] zPost = new Discrete[numChunks][];
                
                for (int pass = 0; pass < 5; pass++)
                {
                    for (int c = 0; c < numChunks; c++)
                    {
                        if (pass == 0)
                        {
                            zInit.ObservedValue = Distribution<int>.Array(Util.ArrayInit(data[c].Length, i => Discrete.PointMass(Rand.Int(numClusters), numClusters)));
                        }                    
                        dataCount.ObservedValue = data[c].Length;
                        y.ObservedValue = data[c];
                        model.InferShared(e, c);
                        zPost[c] = e.Infer<Discrete[]>(z);
                    }
                }            
    
                Gaussian[] meanPost = Distribution.ToArray<Gaussian[]>(mean.Marginal<IDistribution<double[]>>());
                Gamma[] precisionPost = Distribution.ToArray<Gamma[]>(precision.Marginal<IDistribution<double[]>>());
                Dirichlet postCoef = mixCoef.Marginal<Dirichlet>();
    
                for (int i = 0; i < numClusters; i++)
                {
                    double eMean = meanPost[i].GetMean();
                    double eVariance = 1.0 / precisionPost[i].GetMean() + meanPost[i].GetVariance();
                    Console.WriteLine("Mean: " + eMean + "; Var: " + eVariance); 
                }
                Console.WriteLine("Coef: " + postCoef);
    
                for (int c = 0; c < numChunks; c++)
                {
                    for (int i = 0; i < zPost[c].Length; i++)
                    {
                        //Console.WriteLine(zPost[c][i]);
                    }                
                }
            }
    
            private static GaussianArray makeGaussianArray(int length)
            {
                Gaussian[] result = new Gaussian[length];
                for (int i = 0; i < length; i++)
                    result[i] = new Gaussian(0.0, 100); //Gaussian.FromMeanAndPrecision(0.0, 0.10);
                return (GaussianArray) Distribution<double>.Array<Gaussian>(result);
            }
    
            private static GammaArray makeGammaArray(int length)
            {
                Gamma[] result = new Gamma[length];
                for (int i = 0; i < length; i++)
                    result[i] = new Gamma(2.0, 0.5); // Gaussian.FromMeanAndPrecision(0.0, 0.10);
                return (GammaArray)Distribution<double>.Array<Gamma>(result);
            }
    }



    • Edited by ZedWill Thursday, March 26, 2015 4:45 PM
    Thursday, March 26, 2015 4:44 PM