locked
Questions about the Recommender System Example RRS feed

  • Question

  • Dear everyone

    I am working on the RecommenderSystem example, trying to improve it in the form of the one in the referring paper.

    However i have some issues that i'm not able to solve:

    _ I'm trying to test it in the movielens dataset with also the cold start phase, but however i need to be sure about the value of some variables.  Infact, because the cold start, for each test user I have almost all the variables to the results they have after the first phase(the train over all the train users). The complete code is the following:

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    using System.IO;
    using MicrosoftResearch;
    using MicrosoftResearch.Infer;
    using MicrosoftResearch.Infer.Models;
    using MicrosoftResearch.Infer.Utils;
    using MicrosoftResearch.Infer.Distributions;
    using MicrosoftResearch.Infer.Maths;
    using MicrosoftResearch.Infer.Factors;
    using MicrosoftResearch.Infer.Collections;
    using MicrosoftResearch.Infer.Graphs;
    using MicrosoftResearch.Infer.Transforms;
    using MicrosoftResearch.Infer.Views;
    using MicrosoftResearch.Transforms.CodeModel;
    
    
    
    namespace ConsoleApplication4
    {
        public class RSnew
        {
    
            // Define counts
    
            private int numUsers = 943;
            private int numItems = 1682;
    
            private int numTraits = 20;
            private int numLevels = 4;
    
            private Variable<int> numObservations;
    
                    // Define ranges
            private Range user;
            private Range item;
            private Range trait;
            private Range observation;
            private Range level;
    
            // Define latent variables
            private VariableArray<VariableArray<double>, double[][]> userTraits;
            private VariableArray<VariableArray<double>, double[][]> itemTraits;
            private VariableArray<double> userBias;
            private VariableArray<double> itemBias;
            private VariableArray<VariableArray<double>, double[][]> userThresholds;
    
            // Define priors
            private VariableArray<VariableArray<Gaussian>, Gaussian[][]> userTraitsPrior;
            private VariableArray<VariableArray<Gaussian>, Gaussian[][]> itemTraitsPrior;
            private VariableArray<Gaussian> userBiasPrior;
            private VariableArray<Gaussian> itemBiasPrior;
            private VariableArray<VariableArray<Gaussian>, Gaussian[][]> userThresholdsPrior;
    
            // Model noises
    
            Variable<double> affinityNoiseVariance;
            Variable<double> thresholdsNoiseVariance;
    
            private Gaussian traitPrior;
            private Gaussian biasPrior;
    
            InferenceEngine engine;
    
    
    
            // Define priors
            private VariableArray<VariableArray<Gaussian>, Gaussian[][]> BASEuserTraitsPrior;
            private VariableArray<VariableArray<Gaussian>, Gaussian[][]> BASEitemTraitsPrior;
            private VariableArray<Gaussian> BASEuserBiasPrior;
            private VariableArray<Gaussian> BASEitemBiasPrior;
            private VariableArray<VariableArray<Gaussian>, Gaussian[][]> BASEuserThresholdsPrior;
    
            private Gaussian BASEtraitPrior;
            private Gaussian BASEbiasPrior;
    
            private VariableArray<VariableArray<double>, double[][]> BASEuserTraits;
            private VariableArray<VariableArray<double>, double[][]> BASEitemTraits;
            private VariableArray<double> BASEuserBias;
            private VariableArray<double> BASEitemBias;
            private VariableArray<VariableArray<double>, double[][]> BASEuserThresholds;
    
    
            Variable<double> BASEaffinityNoiseVariance;
            Variable<double> BASEthresholdsNoiseVariance;
    
            // Declare training data variables
    
            private VariableArray<int> userData;
            private VariableArray<int> itemData;
            private VariableArray<VariableArray<bool>,bool[][]> ratingData;
    
    
            public void RS(){
    
            user = new Range(numUsers).Named("user");
            item = new Range(numItems).Named("item");
            trait = new Range(numTraits).Named("trait");
            level = new Range(numLevels).Named("level");
    
            numObservations = Variable.Observed(1).Named("numObservations");
            observation = new Range(numObservations).Named("observation");
            //testObservation = new Range(numObservations).Named("testObservation");
    
            // Define latent variables
            userTraits = Variable.Array(Variable.Array<double>(trait), user).Named("userTraits");
            itemTraits = Variable.Array(Variable.Array<double>(trait), item).Named("itemTraits");
            userBias = Variable.Array<double>(user).Named("userBias");
            itemBias = Variable.Array<double>(item).Named("itemBias");
            userThresholds = Variable.Array(Variable.Array<double>(level), user).Named("userThresholds");
    
            // Define priors
            userTraitsPrior = Variable.Array(Variable.Array<Gaussian>(trait), user).Named("userTraitsPrior");
            itemTraitsPrior = Variable.Array(Variable.Array<Gaussian>(trait), item).Named("itemTraitsPrior");
            userBiasPrior = Variable.Array<Gaussian>(user).Named("userBiasPrior");
            itemBiasPrior = Variable.Array<Gaussian>(item).Named("itemBiasPrior");
            userThresholdsPrior = Variable.Array(Variable.Array<Gaussian>(level), user).Named("userThresholdsPrior");
    
            // Define latent variables statistically
            userTraits[user][trait] = Variable<double>.Random(userTraitsPrior[user][trait]);
            itemTraits[item][trait] = Variable<double>.Random(itemTraitsPrior[item][trait]);
            userBias[user] = Variable<double>.Random(userBiasPrior[user]);
            itemBias[item] = Variable<double>.Random(itemBiasPrior[item]);
            userThresholds[user][level] = Variable<double>.Random(userThresholdsPrior[user][level]);
    
            userData = Variable.Array<int>(observation).Named("userData");
            itemData = Variable.Array<int>(observation).Named("itemData");
            ratingData = Variable.Array(Variable.Array<bool>(level), observation).Named("ratingData");
    
    
            // This example requires EP
    
            engine = new InferenceEngine();
    
            }
    
            public void Intialize()
            {
                traitPrior = Gaussian.FromMeanAndVariance(0.0, 1.0);
                biasPrior = Gaussian.FromMeanAndVariance(0.0, 1.0);
                userTraitsPrior.ObservedValue = Util.ArrayInit(numUsers, u => Util.ArrayInit(numTraits, t => traitPrior));
                itemTraitsPrior.ObservedValue = Util.ArrayInit(numItems, i => Util.ArrayInit(numTraits, t => traitPrior));
                userBiasPrior.ObservedValue = Util.ArrayInit(numUsers, u => biasPrior);
                itemBiasPrior.ObservedValue = Util.ArrayInit(numItems, i => biasPrior);
                userThresholdsPrior.ObservedValue = Util.ArrayInit(numUsers, u =>
                        Util.ArrayInit(numLevels, l => Gaussian.FromMeanAndVariance(l - numLevels / 2.0 + 0.5, 1.0)));
                // Break symmetry and remove ambiguity in the traits
                for (int i = 0; i < numTraits; i++)
                {
                    // Assume that numTraits < numItems
                    for (int j = 0; j < numTraits; j++)
                    {
                        itemTraitsPrior.ObservedValue[i][j] = Gaussian.PointMass(0);
                    }
                    itemTraitsPrior.ObservedValue[i][i] = Gaussian.PointMass(1);
                }
                
                // Set model noises explicitly
    
                affinityNoiseVariance = Variable.Observed(0.1).Named("affinityNoiseVariance");
                thresholdsNoiseVariance = Variable.Observed(0.1).Named("thresholdsNoiseVariance");
            }
    
            public void IntializePost()
            {
    
                userTraitsPrior = BASEuserTraitsPrior;
                itemTraitsPrior = BASEitemTraitsPrior;
                userBiasPrior = BASEuserBiasPrior;
                itemBiasPrior = BASEitemBiasPrior;
                userThresholdsPrior = BASEuserThresholdsPrior;
    
                traitPrior = BASEtraitPrior;
                biasPrior = BASEbiasPrior;
    
                userTraits = BASEuserTraits;
                itemTraits = BASEitemTraits;
                userBias = BASEuserBias;
                itemBias = BASEitemBias;
                userThresholds = BASEuserThresholds;
                        
                affinityNoiseVariance = BASEaffinityNoiseVariance;
                thresholdsNoiseVariance = BASEthresholdsNoiseVariance;
    
    
            }
    
            public void Train(string fileName, int trainCase)
            {
    
    
                if (trainCase == 1)
                    IntializePost();
    
                engine.NumberOfIterations = 1;
                if (!(engine.Algorithm is ExpectationPropagation))
                {
                    Console.WriteLine("This example only runs with Expectation Propagation");
                }
    
    
                //*********************** File Reading Start ***********************
    
                int[] tmpUser;
                int[] tmpItem;
                int[] tmpRating;
    
                LoadData(fileName, out tmpUser, out tmpItem, out tmpRating);
    
                //*********************** File Reading End ************************
    
    
    
    
                numObservations = Variable.Observed(tmpItem.Length);
    
                observation = new Range(numObservations);
    
                userData = Variable.Array<int>(observation);
                itemData = Variable.Array<int>(observation);
                ratingData = Variable.Array(Variable.Array<bool>(level), observation);
    
    
                // Model
                using (Variable.ForEach(observation))
                {
                    VariableArray<double> products = Variable.Array<double>(trait);//.Named("products");
                    products[trait] = userTraits[userData[observation]][trait] * itemTraits[itemData[observation]][trait];
    
                    Variable<double> bias = (userBias[userData[observation]] + itemBias[itemData[observation]]);//.Named("bias");
                    Variable<double> affinity = (bias + Variable.Sum(products));//.Named("productSum")).Named("affinity");
                    Variable<double> noisyAffinity = Variable.GaussianFromMeanAndVariance(affinity, affinityNoiseVariance);//.Named("noisyAffinity");
    
                    VariableArray<double> noisyThresholds = Variable.Array<double>(level);//.Named("noisyThresholds");
                    noisyThresholds[level] = Variable.GaussianFromMeanAndVariance(userThresholds[userData[observation]][level], thresholdsNoiseVariance);
                    ratingData[observation][level] = noisyAffinity > noisyThresholds[level];
                }
    
                // Observe training data
                GenerateData(numUsers, numItems, numTraits, numObservations.ObservedValue, numLevels,
                                         userData, itemData, ratingData,
                                         userTraitsPrior.ObservedValue, itemTraitsPrior.ObservedValue,
                                         userBiasPrior.ObservedValue, itemBiasPrior.ObservedValue, userThresholdsPrior.ObservedValue,
                                         affinityNoiseVariance.ObservedValue, thresholdsNoiseVariance.ObservedValue, tmpUser, tmpItem,
                                         tmpRating, trainCase);
    
    
                // Allow EP to process the product factor as if running VMP
                // as in Stern, Herbrich, Graepel paper.
                engine.Compiler.GivePriorityTo(typeof(GaussianProductOp_SHG09));
                engine.Compiler.ShowWarnings = true;
    
    
                // Run inference
                var userTraitsPosterior = engine.Infer<Gaussian[][]>(userTraits);
                var itemTraitsPosterior = engine.Infer<Gaussian[][]>(itemTraits);
                var userBiasPosterior = engine.Infer<Gaussian[]>(userBias);
                var itemBiasPosterior = engine.Infer<Gaussian[]>(itemBias);
                var userThresholdsPosterior = engine.Infer<Gaussian[][]>(userThresholds);
    
    
                // Feed in the inferred posteriors as the new priors
                userTraitsPrior.ObservedValue = userTraitsPosterior;
                itemTraitsPrior.ObservedValue = itemTraitsPosterior;
                userBiasPrior.ObservedValue = userBiasPosterior;
                itemBiasPrior.ObservedValue = itemBiasPosterior;
                userThresholdsPrior.ObservedValue = userThresholdsPosterior;
    
                if (trainCase == 0)
                {
    
                    BASEuserTraitsPrior = userTraitsPrior;
                    BASEitemTraitsPrior = itemTraitsPrior;
                    BASEuserBiasPrior = userBiasPrior;
                    BASEitemBiasPrior = itemBiasPrior;
                    BASEuserThresholdsPrior = userThresholdsPrior;
    
                    BASEtraitPrior = traitPrior;
                    BASEbiasPrior = biasPrior;
    
                    BASEuserTraits = userTraits;
                    BASEitemTraits = itemTraits;
                    BASEuserBias = userBias;
                    BASEitemBias = itemBias;
                    BASEuserThresholds = userThresholds;
    
                    BASEaffinityNoiseVariance = affinityNoiseVariance;
                    BASEthresholdsNoiseVariance = thresholdsNoiseVariance;
    
                }
    
            }
    
            public object Test(string filename)
            {
                //*************!!!!!!!!! Single testset case !!!!!!!!!!*************
                // Make a prediction
                
                int[] userTest;
                int[] itemTest;
                int[] testRating;
    
                LoadData(filename, out userTest, out itemTest, out testRating);
    
                numObservations.ObservedValue = userTest.Length;
    
                userData.ObservedValue = userTest;
                itemData.ObservedValue = itemTest;
                ratingData.ClearObservedValue();
    
                Bernoulli[][] predictedRating = engine.Infer<Bernoulli[][]>(ratingData);
                computeMAE_RMSE(numObservations.ObservedValue, predictedRating, testRating);
                object results = (computeMAE_RMSE(numObservations.ObservedValue, predictedRating, testRating));
                return results;
            }
    
            private Tuple<float, double, string> computeMAE_RMSE(int numObservations, Bernoulli[][] predictedRating, int[] testRating)
            {
    
                float MAE = 0;
                double RMSE = 0;
    
                for (int i = 0; i < numObservations; i++)
                {
                    double value;
                    int prediction = 1;
                    foreach (var rating in predictedRating[i])
                    {
                        value = rating.GetMean();
                        if ((float)value > 0.5)
                            prediction += 1;
                    }
                    MAE += Math.Abs(prediction - testRating[i]);
                    RMSE += Math.Pow(prediction - testRating[i], 2);
                }
                MAE = MAE / numObservations;
                RMSE = Math.Sqrt(RMSE / numObservations);
                string sol = "MAE -> " + MAE + " RMSE-> " + RMSE;
                Console.WriteLine(sol);
                return new Tuple<float,double,string>(MAE,RMSE,sol);
    
            }
    
    
            // Generates data from the model
    
            void GenerateData(int numUsers, int numItems, int numTraits, int numObservations, int numLevels,
                                                VariableArray<int> userData, VariableArray<int> itemData,
                                                VariableArray<VariableArray<bool>, bool[][]> ratingData,
                                                Gaussian[][] userTraitsPrior, Gaussian[][] itemTraitsPrior,
                                                Gaussian[] userBiasPrior, Gaussian[] itemBiasPrior, Gaussian[][] userThresholdsPrior,
                                                double affinityNoiseVariance, double thresholdsNoiseVariance,
                                                int[] users, int[] items, int[] ratings, int trainCase)
            {
    
    
                int[] generatedUserData = users;
                int[] generatedItemData = items;
                bool[][] generatedRatingData = new bool[numObservations][];
    
    
    
                // Sample model parameters from the priors
                if (trainCase == 0)
                {
                    Rand.Restart(12347);
                    double[][] userTraits = Util.ArrayInit(numUsers, u => Util.ArrayInit(numTraits, t => userTraitsPrior[u][t].Sample()));
                    double[][] itemTraits = Util.ArrayInit(numItems, i => Util.ArrayInit(numTraits, t => itemTraitsPrior[i][t].Sample()));
                    double[] userBias = Util.ArrayInit(numUsers, u => userBiasPrior[u].Sample());
                    double[] itemBias = Util.ArrayInit(numItems, i => itemBiasPrior[i].Sample());
                    double[][] userThresholds = Util.ArrayInit(numUsers, u => Util.ArrayInit(numLevels, l => userThresholdsPrior[u][l].Sample()));
                }
    
                // Repeat the model with fixed parameters
                for (int observation = 0; observation < numObservations; observation++)
                {
    
                    switch (ratings[observation])
                    {
    
                        case 1:
                            generatedRatingData[observation] = new bool[] { false, false, false, false };
                            break;
                        case 2:
                            generatedRatingData[observation] = new bool[] { true, false, false, false };
                            break;
                        case 3:
                            generatedRatingData[observation] = new bool[] { true, true, false, false };
                            break;
                        case 4:
                            generatedRatingData[observation] = new bool[] { true, true, true, false };
                            break;
                        case 5:
                            generatedRatingData[observation] = new bool[] { true, true, true, true };
                            break;
    
    
                    }
                }
    
                userData.ObservedValue = generatedUserData;
                itemData.ObservedValue = generatedItemData;
                ratingData.ObservedValue = generatedRatingData;
            }
    
    
            static private void LoadData(
                string ifn,         // The file name
                out int[] tmpUser,   // users
                out int[] tmpItem,   // movies
                out int[] tmpRating)    // ratings
            {
                // File is assumed to have tab or comma separated label, clicks, exams
                tmpUser = null;
                tmpItem = null;
                tmpRating = null;
                int totalDocs = 0;
                string myStr;
                StreamReader mySR;
                char[] sep = { '\t', ',', ' ' };
    
                for (int pass = 0; pass < 2; pass++)
                {
                    if (1 == pass)
                    {
                        tmpUser = new int[totalDocs];
                        tmpItem = new int[totalDocs];
                        tmpRating = new int[totalDocs];
                        totalDocs = 0;
                    }
    
                    mySR = new StreamReader(ifn);
                    //mySR.ReadLine(); // Skip over header line
                    while ((myStr = mySR.ReadLine()) != null)
                    {
    
                        if (1 == pass)
                        {
                            string[] mySplitStr = myStr.Split(sep);
                            int rat = int.Parse(mySplitStr[2]);
                            int urs = int.Parse(mySplitStr[0]);
                            int itm = int.Parse(mySplitStr[1]);
                            tmpUser[totalDocs] = urs - 1;
                            tmpItem[totalDocs] = itm - 1;
                            tmpRating[totalDocs] = rat;
                        }
                        totalDocs++;
                    }
                    mySR.Close();
                }
            }
        }
    }
    


    The problem in this code are the function InitializePost() and the assignement in 

    if (trainCase == 0)
                { ...

    in the function train, because i don't provide a copy of the variables, but just link them with the =. I've tried with the .Clone() and the Variable.copy() functions but it doesn't work(in both the case because of the cast). How i can copy the values for these variables??

    2_ In the function test i just want to "test" the model as in the second part(***make a prediction***) for the original software. However, unlike the original software, in this case the engine instance make an iteration of the algorithm also in the test case. What did I make wrong?

    Thank you

    Marco

    Friday, May 17, 2013 9:22 AM

All replies

  • What is the purpose of InitializePost? 
    Wednesday, May 22, 2013 5:21 PM
    Owner
  • It was used in a particular version of ColdStart. However I have changed the code and this part is not present anymore. 

    Thank you for your Reply.

    Marco

    Tuesday, May 28, 2013 9:52 AM
  • So the problem is fixed now?
    Tuesday, May 28, 2013 10:50 AM
    Owner