locked
posterior value of all the elements of Community varriablearray is same RRS feed

  • Question

  • I recently started using Infer.net framework and enjoy using it.

    I have included three new variables: CommunityScore, Community, and CommunityProb in DARE model. After running ExpectationPropagation inference engine, I get same posterior value for all the elements in CommunityPosterior array. For example: 

    

    [0]    {Discrete(SparseVector(4, 0.25))}

    [1]    {Discrete(SparseVector(4, 0.25))}

    [2]    {Discrete(SparseVector(4, 0.25))}

    [3]    {Discrete(SparseVector(4, 0.25))}  

    [4]    {Discrete(SparseVector(4, 0.25))}  

    [5]    {Discrete(SparseVector(4, 0.25))}  

    [6]    {Discrete(SparseVector(4, 0.25))} 

    [7]    {Discrete(SparseVector(4, 0.25))}  

    [8]    {Discrete(SparseVector(4, 0.25))} 

    [9]    {Discrete(SparseVector(4, 0.25))}     

    [10]  {Discrete(SparseVector(4, 0.25))}        

    But posterior values supposed to be different. Why am I getting the same value for all of them?

    Here is my entire source code. 

    using MicrosoftResearch.Infer;
    using MicrosoftResearch.Infer.Distributions;
    using MicrosoftResearch.Infer.Factors;
    using MicrosoftResearch.Infer.Maths;
    using MicrosoftResearch.Infer.Models;
    using MicrosoftResearch.Infer.Utils;
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    
    namespace InferLabel
    {
        /// <summary>
        /// Reference: ICML 2012 (DARE) how to grade a test without knowing the answer?
        ///  
        public class CDARE 
        {
            #region Fields
            // const
            //public const double ABILITY_PRIOR_MEAN = 0;
            //public const double ABILITY_PRIOR_VARIANCE = 1; //50
            public const double DIFFICULTY_PRIOR_MEAN = 0;
            public const double DIFFICULTY_PRIOR_VARIANCE = 1; //50
            public const double DISCRIM_PRIOR_SHAPE = 1;//5;
            public const double DISCRIM_PRIOR_SCALE = 0.0001;//D - 0.01; R - 0.0001; CF - 0.002
            //const int NUMBER_OF_ITERATIONS = 35;//D - 15; R - 35; CF - 35
    
            // Ranges - size of the variables
            public static Range worker; 
            public static Range task;
            public static Range workerTask;
            public static Range choice;
            public static Range M;
    
            // Main Variables in the model
            public VariableArray<double> workerAbility;
            public VariableArray<double> taskDifficulty;
            public VariableArray<double> discrimination;
            public VariableArray<int> trueLabel;
            public VariableArray<VariableArray<int>, int[][]> workerResponse;
    
            //community variables
            public VariableArray<int> Community;
            public VariableArray<Discrete> CommunityInit;
            public Variable<Vector> CommunityProb;
            public VariableArray<double> CommunityScore;
    
            // Prior distributions
            public VariableArray<Gaussian> taskDifficultyPrior;
            public VariableArray<Gamma> discriminationPrior; 
            public Variable<Dirichlet> CommunityProbPrior;
            public VariableArray<Gaussian> CommunityScorePrior;
    
            // Variables in model
            public Variable<int> WorkerCount;
            public VariableArray<int> WorkerTaskCount;
            public VariableArray<VariableArray<int>, int[][]> WorkerTaskIndex;
    
            // Posterior distributions
            public Gaussian[] workerAbilityPosterior;
            public Gaussian[] taskDifficultyPosterior;
            public Gamma[] discriminationPosterior;
            public static Discrete[] trueLabelPosterior;
            public Discrete[] CommunityPosterior;
            public Dirichlet CommunityProbPosterior;
            public Gaussian[] CommunityScorePosterior;
    
            // parameters 
            public Variable<int> CommunityCount;
            public int NumbersOfCommunity;
            public double CommunityPseudoCount;
            public double ReliabilityPrecision;
            public int NumberOfIterations;
    
            // Inference engine
            public InferenceEngine Engine;
    
            #endregion
    
            #region Methods
            /// <summary>
            /// Creates a CDARE model instance.
            /// </summary>
            public CDARE()
            {
                NumberOfIterations = 35;
                ReliabilityPrecision = 1;
                NumbersOfCommunity = 4;
                CommunityPseudoCount = 10.0;
            }
    
            /// <summary>
            /// Initializes the CDARE model.
            /// </summary>
            /// <param name="taskCount">The number of tasks.</param>
            /// <param name="workerCount">The number of workers.</param>
            /// <param name="labelCount">The number of labels.</param>
            public void CreateModel(int taskCount, int workerCount, int labelCount)
            {
                DefineVariablesAndRanges(taskCount, workerCount, labelCount);
                DefineGenerativeProcess();
                DefineInferenceEngine();
            }
    
            /// <summary>
            /// Defines the variables and the ranges of CDARE.
            /// </summary>
            /// <param name="taskCount">The number of tasks.</param>
            /// <param name="workerCount">The number of workers.</param>
            /// <param name="labelCount">The number of labels.</param>
            public void DefineVariablesAndRanges(int taskCount, int workerCount, int labelCount)
            {
                CommunityCount = Variable.New<int>().Named("CommunityCount");
                M = new Range(CommunityCount).Named("M");
                task = new Range(taskCount).Named("task");
                choice = new Range(labelCount).Named("choice");
                worker = new Range(workerCount).Named("worker");
    
                // The tasks for each worker
                WorkerTaskCount = Variable.Array<int>(worker).Named("WorkerTaskCount");
                workerTask = new Range(WorkerTaskCount[worker]).Named("workerTask");
                WorkerTaskIndex = Variable.Array(Variable.Array<int>(workerTask), worker).Named("WorkerTaskIndex");
                WorkerTaskIndex.SetValueRange(task);
    
                // Community membership
                CommunityProbPrior = Variable.New<Dirichlet>().Named("CommunityProbPrior");
                CommunityProb = Variable<Vector>.Random(CommunityProbPrior).Named("CommunityProb");
                CommunityProb.SetValueRange(M);
                Community = Variable.Array<int>(worker).Named("Community"); //.Attrib(QueryTypes.Marginal).Attrib(QueryTypes.MarginalDividedByPrior)
                Community[worker] = Variable.Discrete(CommunityProb).ForEach(worker);
                // Initialiser to break symmetry for community membership
                CommunityInit = Variable.Array<Discrete>(worker).Named("CommunityInit");
                Community[worker].InitialiseTo(CommunityInit[worker]);
    
                // Community parameters
                CommunityScorePrior = Variable<Gaussian>.Array(M).Named("CommunityScorePrior");
                CommunityScore = Variable.Array<double>(M).Named("CommunityScore");
                CommunityScore[M] = Variable<double>.Random(CommunityScorePrior[M]);
    
                //worker ability for each worker
                workerAbility = Variable.Array<double>(worker).Named("workerAbility");
    
                //task difficulty for each task
                taskDifficultyPrior = Variable<Gaussian>.Array(task).Named("taskDifficultyPrior"); 
                taskDifficulty = Variable.Array<double>(task).Named("taskDifficulty");
                taskDifficulty[task] = Variable<double>.Random(taskDifficultyPrior[task]);
    
                // discrimination of each task
                discriminationPrior = Variable<Gamma>.Array(task).Named("discriminationPrior"); 
                discrimination = Variable.Array<double>(task).Named("discrimination");
                discrimination[task] = Variable<double>.Random(discriminationPrior[task]);
                
                //unobserved true label for each task
                trueLabel = Variable.Array<int>(task).Named("trueLabel");
                trueLabel[task] = Variable.DiscreteUniform(choice).ForEach(task);
    
                // The labels given by the workers
                workerResponse = Variable.Array(Variable.Array<int>(workerTask), worker).Named("workerResponse");
            }
    
            /// <summary>
            /// Defines the generative process of CDARE.
            /// </summary>
            public void DefineGenerativeProcess()
            {
                // The process that generates the worker's label
                using (Variable.ForEach(worker))
                {
                    using (Variable.Switch(Community[worker]))
                    {
                        workerAbility[worker] = Variable.GaussianFromMeanAndPrecision(CommunityScore[Community[worker]], ReliabilityPrecision);
                    }
    
                    var workerTaskDifficulty = Variable.Subarray(taskDifficulty, WorkerTaskIndex[worker]);
                    var workerTaskDiscrimination = Variable.Subarray(discrimination, WorkerTaskIndex[worker]);
                    var TrueLabel = Variable.Subarray(trueLabel, WorkerTaskIndex[worker]);
    
                    using (Variable.ForEach(workerTask))
                    {
                        var advantage = (workerAbility[worker] - workerTaskDifficulty[workerTask]).Named("advantage");
                        var advantageNoisy = Variable.GaussianFromMeanAndPrecision(advantage, workerTaskDiscrimination[workerTask]).Named("advantageNoisy");
                        var correct = (advantageNoisy > 0).Named("correct");
                        using (Variable.If(correct))
                            workerResponse[worker][workerTask] = TrueLabel[workerTask];
                        using (Variable.IfNot(correct))
                            workerResponse[worker][workerTask] = Variable.DiscreteUniform(choice);
                    }
                }
            }
    
            /// <summary>
            /// Initializes the CBCC inference engine.
            /// </summary>
            public void DefineInferenceEngine()
            {
                Engine = new InferenceEngine(new ExpectationPropagation());
                Engine.ShowProgress = false;
                Engine.Compiler.UseParallelForLoops = true;
                Engine.Compiler.WriteSourceFiles = false;
                Engine.ShowFactorGraph = false;
            }
    
            /// <summary>
            /// Attachs the data to the workers labels.
            /// </summary>
            /// <param name="taskIndices">The matrix of the task indices (columns) of each worker (rows).</param>
            /// <param name="workerLabels">The matrix of the labels (columns) of each worker (rows).</param>
            public void AttachData(int[][] taskIndices, int[][] workerLabels)
            {
                CommunityCount.ObservedValue = NumbersOfCommunity;
                WorkerTaskCount.ObservedValue = taskIndices.Select(tasks => tasks.Length).ToArray();
                WorkerTaskIndex.ObservedValue = taskIndices;
                workerResponse.ObservedValue = workerLabels;
            }
    
            /// <summary>
            /// Sets the priors of CDARE.
            /// </summary>
            /// <param name="taskCount">The number of tasks.</param>
            /// <param name="workerCount">The number of workers.</param>
            public void SetPriors(int taskCount, int workerCount)
            {
                taskDifficultyPrior.ObservedValue = Util.ArrayInit(taskCount, t => Gaussian.FromMeanAndPrecision(DIFFICULTY_PRIOR_MEAN, DIFFICULTY_PRIOR_VARIANCE));
                discriminationPrior.ObservedValue = Util.ArrayInit(taskCount, k => Gamma.FromMeanAndVariance(DISCRIM_PRIOR_SHAPE, DISCRIM_PRIOR_SCALE));
                CommunityProbPrior.ObservedValue = Dirichlet.Symmetric(NumbersOfCommunity, CommunityPseudoCount);
                CommunityInit.ObservedValue = Util.ArrayInit(workerCount, worker => Discrete.PointMass(Rand.Int(NumbersOfCommunity), NumbersOfCommunity));
                CommunityScorePrior.ObservedValue = Util.ArrayInit(NumbersOfCommunity, k => Gaussian.FromMeanAndPrecision(1, 1));
            }
    
            /// <summary>
            /// Infers the posteriors of CDARE using the attached data.
            /// </summary>
            /// <param name="taskIndices">The matrix of the task indices (columns) of each worker (rows).</param>
            /// <param name="workerLabels">The matrix of the labels (columns) of each worker (rows).</param>
            /// <param name="taskCount">The number of tasks.</param>
            /// <returns></returns>
            public void Infer(int[][] taskIndices, int[][] workerLabels, int taskCount)
            {
                SetPriors(taskCount, workerLabels.Length); 
                AttachData(taskIndices, workerLabels);
                Engine.NumberOfIterations = NumberOfIterations;
    
                trueLabelPosterior = Engine.Infer<Discrete[]>(trueLabel);
                workerAbilityPosterior = Engine.Infer<Gaussian[]>(workerAbility);
                taskDifficultyPosterior = Engine.Infer<Gaussian[]>(taskDifficulty);
                discriminationPosterior = Engine.Infer<Gamma[]>(discrimination);
                CommunityScorePosterior = Engine.Infer<Gaussian[]>(CommunityScore);
                CommunityPosterior = Engine.Infer<Discrete[]>(Community);
                CommunityProbPosterior = Engine.Infer<Dirichlet>(CommunityProb);
            }
            #endregion
        }
    }




    • Edited by Dascafe Wednesday, November 18, 2015 5:51 AM
    Wednesday, November 11, 2015 7:31 AM

All replies

  • This is a symmetry breaking issue.  In this model, it is not enough to initialise Community[worker].  You also need to initialize TrueLabel, e.g. by majority vote.
    Wednesday, November 11, 2015 4:41 PM
    Owner