Asked by:
Questions about the Recommender System Example

Question
-
Dear everyone
I am working on the RecommenderSystem example, trying to improve it in the form of the one in the referring paper.
However i have some issues that i'm not able to solve:
_ I'm trying to test it in the movielens dataset with also the cold start phase, but however i need to be sure about the value of some variables. Infact, because the cold start, for each test user I have almost all the variables to the results they have after the first phase(the train over all the train users). The complete code is the following:
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using System.IO; using MicrosoftResearch; using MicrosoftResearch.Infer; using MicrosoftResearch.Infer.Models; using MicrosoftResearch.Infer.Utils; using MicrosoftResearch.Infer.Distributions; using MicrosoftResearch.Infer.Maths; using MicrosoftResearch.Infer.Factors; using MicrosoftResearch.Infer.Collections; using MicrosoftResearch.Infer.Graphs; using MicrosoftResearch.Infer.Transforms; using MicrosoftResearch.Infer.Views; using MicrosoftResearch.Transforms.CodeModel; namespace ConsoleApplication4 { public class RSnew { // Define counts private int numUsers = 943; private int numItems = 1682; private int numTraits = 20; private int numLevels = 4; private Variable<int> numObservations; // Define ranges private Range user; private Range item; private Range trait; private Range observation; private Range level; // Define latent variables private VariableArray<VariableArray<double>, double[][]> userTraits; private VariableArray<VariableArray<double>, double[][]> itemTraits; private VariableArray<double> userBias; private VariableArray<double> itemBias; private VariableArray<VariableArray<double>, double[][]> userThresholds; // Define priors private VariableArray<VariableArray<Gaussian>, Gaussian[][]> userTraitsPrior; private VariableArray<VariableArray<Gaussian>, Gaussian[][]> itemTraitsPrior; private VariableArray<Gaussian> userBiasPrior; private VariableArray<Gaussian> itemBiasPrior; private VariableArray<VariableArray<Gaussian>, Gaussian[][]> userThresholdsPrior; // Model noises Variable<double> affinityNoiseVariance; Variable<double> thresholdsNoiseVariance; private Gaussian traitPrior; private Gaussian biasPrior; InferenceEngine engine; // Define priors private VariableArray<VariableArray<Gaussian>, Gaussian[][]> BASEuserTraitsPrior; private VariableArray<VariableArray<Gaussian>, Gaussian[][]> BASEitemTraitsPrior; private VariableArray<Gaussian> BASEuserBiasPrior; private VariableArray<Gaussian> BASEitemBiasPrior; private VariableArray<VariableArray<Gaussian>, Gaussian[][]> BASEuserThresholdsPrior; private Gaussian BASEtraitPrior; private Gaussian BASEbiasPrior; private VariableArray<VariableArray<double>, double[][]> BASEuserTraits; private VariableArray<VariableArray<double>, double[][]> BASEitemTraits; private VariableArray<double> BASEuserBias; private VariableArray<double> BASEitemBias; private VariableArray<VariableArray<double>, double[][]> BASEuserThresholds; Variable<double> BASEaffinityNoiseVariance; Variable<double> BASEthresholdsNoiseVariance; // Declare training data variables private VariableArray<int> userData; private VariableArray<int> itemData; private VariableArray<VariableArray<bool>,bool[][]> ratingData; public void RS(){ user = new Range(numUsers).Named("user"); item = new Range(numItems).Named("item"); trait = new Range(numTraits).Named("trait"); level = new Range(numLevels).Named("level"); numObservations = Variable.Observed(1).Named("numObservations"); observation = new Range(numObservations).Named("observation"); //testObservation = new Range(numObservations).Named("testObservation"); // Define latent variables userTraits = Variable.Array(Variable.Array<double>(trait), user).Named("userTraits"); itemTraits = Variable.Array(Variable.Array<double>(trait), item).Named("itemTraits"); userBias = Variable.Array<double>(user).Named("userBias"); itemBias = Variable.Array<double>(item).Named("itemBias"); userThresholds = Variable.Array(Variable.Array<double>(level), user).Named("userThresholds"); // Define priors userTraitsPrior = Variable.Array(Variable.Array<Gaussian>(trait), user).Named("userTraitsPrior"); itemTraitsPrior = Variable.Array(Variable.Array<Gaussian>(trait), item).Named("itemTraitsPrior"); userBiasPrior = Variable.Array<Gaussian>(user).Named("userBiasPrior"); itemBiasPrior = Variable.Array<Gaussian>(item).Named("itemBiasPrior"); userThresholdsPrior = Variable.Array(Variable.Array<Gaussian>(level), user).Named("userThresholdsPrior"); // Define latent variables statistically userTraits[user][trait] = Variable<double>.Random(userTraitsPrior[user][trait]); itemTraits[item][trait] = Variable<double>.Random(itemTraitsPrior[item][trait]); userBias[user] = Variable<double>.Random(userBiasPrior[user]); itemBias[item] = Variable<double>.Random(itemBiasPrior[item]); userThresholds[user][level] = Variable<double>.Random(userThresholdsPrior[user][level]); userData = Variable.Array<int>(observation).Named("userData"); itemData = Variable.Array<int>(observation).Named("itemData"); ratingData = Variable.Array(Variable.Array<bool>(level), observation).Named("ratingData"); // This example requires EP engine = new InferenceEngine(); } public void Intialize() { traitPrior = Gaussian.FromMeanAndVariance(0.0, 1.0); biasPrior = Gaussian.FromMeanAndVariance(0.0, 1.0); userTraitsPrior.ObservedValue = Util.ArrayInit(numUsers, u => Util.ArrayInit(numTraits, t => traitPrior)); itemTraitsPrior.ObservedValue = Util.ArrayInit(numItems, i => Util.ArrayInit(numTraits, t => traitPrior)); userBiasPrior.ObservedValue = Util.ArrayInit(numUsers, u => biasPrior); itemBiasPrior.ObservedValue = Util.ArrayInit(numItems, i => biasPrior); userThresholdsPrior.ObservedValue = Util.ArrayInit(numUsers, u => Util.ArrayInit(numLevels, l => Gaussian.FromMeanAndVariance(l - numLevels / 2.0 + 0.5, 1.0))); // Break symmetry and remove ambiguity in the traits for (int i = 0; i < numTraits; i++) { // Assume that numTraits < numItems for (int j = 0; j < numTraits; j++) { itemTraitsPrior.ObservedValue[i][j] = Gaussian.PointMass(0); } itemTraitsPrior.ObservedValue[i][i] = Gaussian.PointMass(1); } // Set model noises explicitly affinityNoiseVariance = Variable.Observed(0.1).Named("affinityNoiseVariance"); thresholdsNoiseVariance = Variable.Observed(0.1).Named("thresholdsNoiseVariance"); } public void IntializePost() { userTraitsPrior = BASEuserTraitsPrior; itemTraitsPrior = BASEitemTraitsPrior; userBiasPrior = BASEuserBiasPrior; itemBiasPrior = BASEitemBiasPrior; userThresholdsPrior = BASEuserThresholdsPrior; traitPrior = BASEtraitPrior; biasPrior = BASEbiasPrior; userTraits = BASEuserTraits; itemTraits = BASEitemTraits; userBias = BASEuserBias; itemBias = BASEitemBias; userThresholds = BASEuserThresholds; affinityNoiseVariance = BASEaffinityNoiseVariance; thresholdsNoiseVariance = BASEthresholdsNoiseVariance; } public void Train(string fileName, int trainCase) { if (trainCase == 1) IntializePost(); engine.NumberOfIterations = 1; if (!(engine.Algorithm is ExpectationPropagation)) { Console.WriteLine("This example only runs with Expectation Propagation"); } //*********************** File Reading Start *********************** int[] tmpUser; int[] tmpItem; int[] tmpRating; LoadData(fileName, out tmpUser, out tmpItem, out tmpRating); //*********************** File Reading End ************************ numObservations = Variable.Observed(tmpItem.Length); observation = new Range(numObservations); userData = Variable.Array<int>(observation); itemData = Variable.Array<int>(observation); ratingData = Variable.Array(Variable.Array<bool>(level), observation); // Model using (Variable.ForEach(observation)) { VariableArray<double> products = Variable.Array<double>(trait);//.Named("products"); products[trait] = userTraits[userData[observation]][trait] * itemTraits[itemData[observation]][trait]; Variable<double> bias = (userBias[userData[observation]] + itemBias[itemData[observation]]);//.Named("bias"); Variable<double> affinity = (bias + Variable.Sum(products));//.Named("productSum")).Named("affinity"); Variable<double> noisyAffinity = Variable.GaussianFromMeanAndVariance(affinity, affinityNoiseVariance);//.Named("noisyAffinity"); VariableArray<double> noisyThresholds = Variable.Array<double>(level);//.Named("noisyThresholds"); noisyThresholds[level] = Variable.GaussianFromMeanAndVariance(userThresholds[userData[observation]][level], thresholdsNoiseVariance); ratingData[observation][level] = noisyAffinity > noisyThresholds[level]; } // Observe training data GenerateData(numUsers, numItems, numTraits, numObservations.ObservedValue, numLevels, userData, itemData, ratingData, userTraitsPrior.ObservedValue, itemTraitsPrior.ObservedValue, userBiasPrior.ObservedValue, itemBiasPrior.ObservedValue, userThresholdsPrior.ObservedValue, affinityNoiseVariance.ObservedValue, thresholdsNoiseVariance.ObservedValue, tmpUser, tmpItem, tmpRating, trainCase); // Allow EP to process the product factor as if running VMP // as in Stern, Herbrich, Graepel paper. engine.Compiler.GivePriorityTo(typeof(GaussianProductOp_SHG09)); engine.Compiler.ShowWarnings = true; // Run inference var userTraitsPosterior = engine.Infer<Gaussian[][]>(userTraits); var itemTraitsPosterior = engine.Infer<Gaussian[][]>(itemTraits); var userBiasPosterior = engine.Infer<Gaussian[]>(userBias); var itemBiasPosterior = engine.Infer<Gaussian[]>(itemBias); var userThresholdsPosterior = engine.Infer<Gaussian[][]>(userThresholds); // Feed in the inferred posteriors as the new priors userTraitsPrior.ObservedValue = userTraitsPosterior; itemTraitsPrior.ObservedValue = itemTraitsPosterior; userBiasPrior.ObservedValue = userBiasPosterior; itemBiasPrior.ObservedValue = itemBiasPosterior; userThresholdsPrior.ObservedValue = userThresholdsPosterior; if (trainCase == 0) { BASEuserTraitsPrior = userTraitsPrior; BASEitemTraitsPrior = itemTraitsPrior; BASEuserBiasPrior = userBiasPrior; BASEitemBiasPrior = itemBiasPrior; BASEuserThresholdsPrior = userThresholdsPrior; BASEtraitPrior = traitPrior; BASEbiasPrior = biasPrior; BASEuserTraits = userTraits; BASEitemTraits = itemTraits; BASEuserBias = userBias; BASEitemBias = itemBias; BASEuserThresholds = userThresholds; BASEaffinityNoiseVariance = affinityNoiseVariance; BASEthresholdsNoiseVariance = thresholdsNoiseVariance; } } public object Test(string filename) { //*************!!!!!!!!! Single testset case !!!!!!!!!!************* // Make a prediction int[] userTest; int[] itemTest; int[] testRating; LoadData(filename, out userTest, out itemTest, out testRating); numObservations.ObservedValue = userTest.Length; userData.ObservedValue = userTest; itemData.ObservedValue = itemTest; ratingData.ClearObservedValue(); Bernoulli[][] predictedRating = engine.Infer<Bernoulli[][]>(ratingData); computeMAE_RMSE(numObservations.ObservedValue, predictedRating, testRating); object results = (computeMAE_RMSE(numObservations.ObservedValue, predictedRating, testRating)); return results; } private Tuple<float, double, string> computeMAE_RMSE(int numObservations, Bernoulli[][] predictedRating, int[] testRating) { float MAE = 0; double RMSE = 0; for (int i = 0; i < numObservations; i++) { double value; int prediction = 1; foreach (var rating in predictedRating[i]) { value = rating.GetMean(); if ((float)value > 0.5) prediction += 1; } MAE += Math.Abs(prediction - testRating[i]); RMSE += Math.Pow(prediction - testRating[i], 2); } MAE = MAE / numObservations; RMSE = Math.Sqrt(RMSE / numObservations); string sol = "MAE -> " + MAE + " RMSE-> " + RMSE; Console.WriteLine(sol); return new Tuple<float,double,string>(MAE,RMSE,sol); } // Generates data from the model void GenerateData(int numUsers, int numItems, int numTraits, int numObservations, int numLevels, VariableArray<int> userData, VariableArray<int> itemData, VariableArray<VariableArray<bool>, bool[][]> ratingData, Gaussian[][] userTraitsPrior, Gaussian[][] itemTraitsPrior, Gaussian[] userBiasPrior, Gaussian[] itemBiasPrior, Gaussian[][] userThresholdsPrior, double affinityNoiseVariance, double thresholdsNoiseVariance, int[] users, int[] items, int[] ratings, int trainCase) { int[] generatedUserData = users; int[] generatedItemData = items; bool[][] generatedRatingData = new bool[numObservations][]; // Sample model parameters from the priors if (trainCase == 0) { Rand.Restart(12347); double[][] userTraits = Util.ArrayInit(numUsers, u => Util.ArrayInit(numTraits, t => userTraitsPrior[u][t].Sample())); double[][] itemTraits = Util.ArrayInit(numItems, i => Util.ArrayInit(numTraits, t => itemTraitsPrior[i][t].Sample())); double[] userBias = Util.ArrayInit(numUsers, u => userBiasPrior[u].Sample()); double[] itemBias = Util.ArrayInit(numItems, i => itemBiasPrior[i].Sample()); double[][] userThresholds = Util.ArrayInit(numUsers, u => Util.ArrayInit(numLevels, l => userThresholdsPrior[u][l].Sample())); } // Repeat the model with fixed parameters for (int observation = 0; observation < numObservations; observation++) { switch (ratings[observation]) { case 1: generatedRatingData[observation] = new bool[] { false, false, false, false }; break; case 2: generatedRatingData[observation] = new bool[] { true, false, false, false }; break; case 3: generatedRatingData[observation] = new bool[] { true, true, false, false }; break; case 4: generatedRatingData[observation] = new bool[] { true, true, true, false }; break; case 5: generatedRatingData[observation] = new bool[] { true, true, true, true }; break; } } userData.ObservedValue = generatedUserData; itemData.ObservedValue = generatedItemData; ratingData.ObservedValue = generatedRatingData; } static private void LoadData( string ifn, // The file name out int[] tmpUser, // users out int[] tmpItem, // movies out int[] tmpRating) // ratings { // File is assumed to have tab or comma separated label, clicks, exams tmpUser = null; tmpItem = null; tmpRating = null; int totalDocs = 0; string myStr; StreamReader mySR; char[] sep = { '\t', ',', ' ' }; for (int pass = 0; pass < 2; pass++) { if (1 == pass) { tmpUser = new int[totalDocs]; tmpItem = new int[totalDocs]; tmpRating = new int[totalDocs]; totalDocs = 0; } mySR = new StreamReader(ifn); //mySR.ReadLine(); // Skip over header line while ((myStr = mySR.ReadLine()) != null) { if (1 == pass) { string[] mySplitStr = myStr.Split(sep); int rat = int.Parse(mySplitStr[2]); int urs = int.Parse(mySplitStr[0]); int itm = int.Parse(mySplitStr[1]); tmpUser[totalDocs] = urs - 1; tmpItem[totalDocs] = itm - 1; tmpRating[totalDocs] = rat; } totalDocs++; } mySR.Close(); } } } }
The problem in this code are the function InitializePost() and the assignement in
if (trainCase == 0) { ...
in the function train, because i don't provide a copy of the variables, but just link them with the =. I've tried with the .Clone() and the Variable.copy() functions but it doesn't work(in both the case because of the cast). How i can copy the values for these variables??
2_ In the function test i just want to "test" the model as in the second part(***make a prediction***) for the original software. However, unlike the original software, in this case the engine instance make an iteration of the algorithm also in the test case. What did I make wrong?
Thank you
Marco
Friday, May 17, 2013 9:22 AM
All replies
-
What is the purpose of InitializePost?Wednesday, May 22, 2013 5:21 PMOwner
-
It was used in a particular version of ColdStart. However I have changed the code and this part is not present anymore.
Thank you for your Reply.
Marco
Tuesday, May 28, 2013 9:52 AM -
So the problem is fixed now?Tuesday, May 28, 2013 10:50 AMOwner