locked
Dirichlets over varying domains (Migrated from community.research.microsoft.com) RRS feed

  • Question

  • laura posted on 07-06-2009 4:29 AM

    I have a question regarding Dirichlets over a varying domain.

    I am using a Dirichlet multinomial model. Since I want to infer parameters for different chunks of data, I used observed variables instead of constants, to avoid model recompilation.

    As the range of the dirichlet (i.e. vocabulary size) is different for each data chunk, I used the Dirichlet constructor with range and double-vector, where the range-size can be changed (as it wraps an observed int-variable).

    The part of the model specification that does this is the following:

    vocabVar = Variable.Observed<int>(vocabSize).Named("vocabVar");
    vocabRange = new Range(vocabVar).Named("vocabRange");
    vocabDistr = Variable.Dirichlet(vocabRange, double[]{0.1,0.1,0.1,0.1});

    Since the double array is fixed in size, but the range-size can arbitrary, this construct may give IndexOutOfRangeExceptions.

    Am I missing an important point here?

    Everything works fine for DirichletUniform, but I'd rather like to have a sparse symmetric Dirichlet.

    Is it possible to wrap the double[] by an observed random variable or to just say "make this symmetric with all dimensions being 0.1"?

    Thanks,

    Laura


    Here is my test code:

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using MicrosoftResearch.Infer.Models;
    using MicrosoftResearch.Infer;
    using MicrosoftResearch.Infer.Collections;
    using MicrosoftResearch.Infer.Distributions;
    using MicrosoftResearch.Infer.Factors;
    using MicrosoftResearch.Infer.Maths;

    namespace pfum.dummymodels
    {
        class VariableDirichletRange
        {
            Variable<int> rangeVar;
            Range range;
            VariableArray<int>  dataVar;
            Variable<int> vocabVar;
            Range vocabRange;
            Variable<Vector> vocabDistr;
            InferenceEngine inferenceEngine;

            /// <summary>
            /// Draw vocabDistr ~ Dirichlet(0.1, ...0.1)
            /// For all items within range do:
            ///   Draw dataVar[range] ~ Discrete(vocabDistr),    dataVar[range] in [1..vocabSize].
            /// </summary>
            public VariableDirichletRange(int vocabSize)
            {
                rangeVar = Variable.Observed<int>(1);
                range = new Range(rangeVar).Named("range");

                vocabVar = Variable.Observed<int>(vocabSize).Named("vocabVar");
                vocabRange = new Range(vocabVar).Named("vocabRange");
                vocabDistr = Variable.New<Vector>().Named("vocabDistr");
                vocabDistr.SetValueRange(vocabRange);


                // ==================================================
                // == yields an IndexOutOfRange exception, because vector u is not long enough
                // SymmetricPrior returns double[]{0.1,0.1,0.1,...0.1}
                vocabDistr = Variable.Dirichlet(vocabRange, SymmetricPrior(vocabSize, 0.1));

                // == works fine, but is not sparse
                //vocabDistr = Variable.DirichletUniform(vocabRange);

                // == works ok if we make vector u ridiculously long
                //vocabDistr = Variable.Dirichlet(vocabRange, SymmetricPrior(100, 0.1));


                // ==================================================
                vocabDistr.Named("vocabDistr");


                dataVar = Variable.Array<int>(range).Named("dataVar");
                dataVar.IsReadOnly = false;


                using (Variable.ForEach(range))
                {
                    dataVar[range] = Variable.Discrete(vocabDistr);
                }
                inferenceEngine = new InferenceEngine();
                inferenceEngine.ShowProgress = true;
                inferenceEngine.ShowFactorGraph = false;
                inferenceEngine.BrowserMode = BrowserMode.OnError;
                inferenceEngine.Algorithm = new VariationalMessagePassing();
                inferenceEngine.NumberOfIterations = 50;
                inferenceEngine.Compiler.GenerateInMemory = false;
                inferenceEngine.ModelName = "VariableDirichletRange";

            }

            internal void loadData(int[] data)
            {
                int dim1 = data.Length;
                rangeVar.ObservedValue = dim1;
                dataVar.ObservedValue = data;

                int maxVocab = 1;
                for (int i = 0; i < dim1; i++)
                {
                    if (data[ i ] > maxVocab)
                    {
                        maxVocab = data[ i ];
                    }
                }

                vocabVar.ObservedValue = maxVocab+1;

                Console.WriteLine("infer vocabDistr " + inferenceEngine.Infer<Object>(vocabDistr));
                Console.ReadKey();
            }
            internal double[] SymmetricPrior(int max, double val)
            {
                double[] result = new double[max];
                for (int i = 0; i < max; i++)
                {
                    result[ i ] = val;
                }
                return result;
            }

            static void Main(string[] args)
            {
                // initializes with vocab size = 4
                dummymodels.VariableDirichletRange model = new pfum.dummymodels.VariableDirichletRange(4);
                // load data with vocabs > 4
                int[] data = new int[] { 1, 2, 3, 1, 2, 3, 5, 5, 6, 3, 3, 3, 3 };
                model.loadData(data);
            }
        }

    }

    Friday, June 3, 2011 4:58 PM

Answers

  • John Guiver replied on 07-16-2009 3:01 AM

    Hi Laura

    You could make the Dirichlet itself a variable. For example:

        Variable<Dirichlet> vPrior = Variable.Observed<Dirichlet>(Dirichlet.Uniform(1));
        Variable<Vector> vocabDistr = Variable<Vector>.Random(vPrior);

    Then, when you're ready to infer, set the observed value of vPrior:

        vPrior.ObservedValue = new Dirichlet(SymmetricPrior(maxVocab+1, 0.1));

    John

    Friday, June 3, 2011 4:58 PM