none
Question implementing ICML 2012 "How to grade a test without knowing the answer"

    Question

  • I am trying to implement the model described in ICML 2012 "How to grade a test without knowing the answer", authored by Yoram Bachrach, Tom Minka etc. The paper said that the model was built with Infer.Net.  Since I didn't find the source code, I implemented it myself. Note that I have omitted the "discrimination" for each question. (How did you implement the complex PDF function when you are deriving C_pq from t_pq ? The Gaussian CDF function)

    When I test the model on small training data,(10 worker, 8 question, 80 response), the model works well. But when I use a bigger training data (38 worker, 100 question, 1000 response), there will be error with the same model.

    "Unhandled Exception: MicrosoftResearch.Infer.Factors.ImproperMessageException: I mproper distribution during inference (Gaussian(m/v=0.7047, 1/v=-0.04749)).  Can not perform inference on this model. "

    I want to ask what's wrong with my model? Why the model cannot handle a larger dataset?

    Can I take a look at the correct model code for that paper? ICML 2012 "How to grade a test without knowing the answer".

    Thank you.

    Kaixiang MO

    This is how I implement the model.

    class Test { /// <summary> /// ICML 2012 how to grade a test without knowing the answer? /// </summary> public static void Main() { int nLabel = 5; double ability_mean = 2; double ability_variance = 10; double difficulty_mean = 0; double difficulty_variance = 10; Variable<int> nQuesiton = Variable.New<int>(); Range QuestionRange = new Range(nQuesiton).Named("nQuestion"); Variable<int> nWorker = Variable.New<int>(); Range WorkerRange = new Range(nWorker).Named("nWorker"); Variable<int> nResponse = Variable.New<int>(); Range ResponseRange = new Range(nResponse).Named("nResponse");

    // ability for each user VariableArray<double> ability = Variable.Array<double>(WorkerRange).Named("ability"); ability[WorkerRange] = Variable.GaussianFromMeanAndVariance(ability_mean, ability_variance).ForEach(WorkerRange);

    // difficulty for each question VariableArray<double> difficulty = Variable.Array<double>(QuestionRange).Named("difficulty"); difficulty[QuestionRange] = Variable.GaussianFromMeanAndVariance(difficulty_mean, difficulty_variance).ForEach(QuestionRange);

    // true label for each question VariableArray<int> trueLabel = Variable.Array<int>(QuestionRange).Named("trueLabel"); trueLabel[QuestionRange] = Variable.DiscreteUniform(nLabel).ForEach(QuestionRange); // define the observed variable, the <workerID, questionID, response> VariableArray<int> workerID = Variable.Array<int>(ResponseRange).Named("workerID"); VariableArray<int> questionID = Variable.Array<int>(ResponseRange).Named("questionID");

    // define advantage during each answering process VariableArray<double> advantage = Variable.Array<double>(ResponseRange).Named("advantages"); using (Variable.ForEach(ResponseRange)) { advantage[ResponseRange] = ability[workerID[ResponseRange]] - difficulty[questionID[ResponseRange]]; }

    // Whether this worker have correctly answered this question

    VariableArray<bool> solved = Variable.Array<bool>(ResponseRange).Named("solved"); using (Variable.ForEach(ResponseRange)) { solved[ResponseRange] = advantage[ResponseRange] > 0; }

    // if questioned is not correctly answer, the user's response is arbitrary VariableArray<int> RandomAnswer = Variable.Array<int>(ResponseRange).Named("random response"); RandomAnswer[ResponseRange] = Variable.DiscreteUniform(nLabel).ForEach(ResponseRange); VariableArray<int> response = Variable.Array<int>(ResponseRange).Named("response"); using (Variable.ForEach(ResponseRange)) { using (Variable.If(solved[ResponseRange])) { response[ResponseRange] = trueLabel[questionID[ResponseRange]]; } using (Variable.IfNot(solved[ResponseRange])) { response[ResponseRange] = RandomAnswer[ResponseRange]; } } Console.Out.WriteLine("Model Construction Completed!"); //nWorker.ObservedValue = 3; //nQuesiton.ObservedValue = 3; //nResponse.ObservedValue = 3; //workerID.ObservedValue = new int[3] { 0,0,1, }; //questionID.ObservedValue = new int[3] { 0,1,1 }; //response.ObservedValue = new int[3] { 2,3,4 }; Read("data.txt"); nWorker.ObservedValue = nWorkerN; nQuesiton.ObservedValue = nQuestionN; nResponse.ObservedValue = nResponseN; workerID.ObservedValue = worker; questionID.ObservedValue = question; response.ObservedValue = responseV; InferenceEngine ie = new InferenceEngine(new ExpectationPropagation()); // show factor graph ie.ShowFactorGraph = true; var resultAbility = ie.Infer(ability); var resultDifficulty = ie.Infer(difficulty); var resultAdvantage = ie.Infer(advantage); var resultTrueLabel = ie.Infer(trueLabel); var solve = ie.Infer(solved); string file = "result.txt"; FileStream fs = new FileStream(file, FileMode.Create); StreamWriter sw = new StreamWriter(fs); sw.WriteLine("ability"); sw.WriteLine(resultAbility); sw.WriteLine("difficulty"); sw.WriteLine(resultDifficulty); sw.WriteLine("advantage"); sw.WriteLine(resultAdvantage); sw.WriteLine("solve"); sw.WriteLine(solve); sw.WriteLine("trueLabel"); sw.WriteLine(resultTrueLabel); Console.Out.WriteLine("ability"); Console.Out.WriteLine(resultAbility); Console.Out.WriteLine("difficulty"); Console.Out.WriteLine(resultDifficulty); Console.Out.WriteLine("trueLabel"); Console.Out.WriteLine(resultTrueLabel); //清空缓冲区 sw.Flush(); //关闭流 sw.Close(); fs.Close(); //InferenceEngine ie = new InferenceEngine(); //ie.ShowFactorGraph = true; //Console.Out.WriteLine("advantage"); //Console.Out.WriteLine(ie.Infer(advantage)); //Console.Out.WriteLine("solve"); //Console.Out.WriteLine(ie.Infer(solved)); //Console.Out.WriteLine("true label"); //Console.Out.WriteLine(ie.Infer(trueLabel)); } static int nWorkerN; static int nQuestionN; static int nResponseN; static int[] worker; static int[] question; static int[] responseV; public static void Read(string path) { string fileName = path; //"C:\\Users\\Nash\\Documents\\MATLAB\\CrowdSourcing\\data\\anger.txt"; StreamReader objReader = new StreamReader(fileName); string sLine = ""; string[] ele = null; char[] SplitChar = new char[] { '\t' }; ArrayList LineList = new ArrayList(); while (sLine != null) { sLine = objReader.ReadLine(); if (sLine != null && !sLine.Equals("")) LineList.Add(sLine); } objReader.Close(); nResponseN = LineList.Count; nWorkerN = 0; nQuestionN = 0; // build int[] worker = new int[nResponseN]; question = new int[nResponseN]; responseV = new int[nResponseN]; for (int i = 0; i < nResponseN; i++) { sLine = (string)(LineList[i]); ele = sLine.Split(SplitChar, StringSplitOptions.RemoveEmptyEntries); worker[i] = int.Parse(ele[0]); question[i] = int.Parse(ele[1]); responseV[i] = int.Parse(ele[2]); nWorkerN = Math.Max(nWorkerN, worker[i]); nQuestionN = Math.Max(nQuestionN, question[i]);

    // show the first 3 lines

    if (i > 3) continue; Console.Out.WriteLine(worker[i] + " " + question[i] + " " + responseV[i]); } // because the number is starting from 0 nWorkerN = nWorkerN + 1; nQuestionN = nQuestionN + 1; Console.Out.WriteLine("Read Completed! Total " + nResponseN + " observation"); } }



    • Edited by Kaixiang MO Tuesday, June 26, 2012 3:25 AM unclear
    Monday, June 25, 2012 5:05 PM

All replies