locked
SAPI: differing events returned with different voices RRS feed

  • Question

  • Hello everyone,

    I am writing a program that uses SAPI 5.1 to synthesize speech and picks up on the phoneme events to lipsynch a character with it. When I use a Microsoft provided voice (such as Mary or Sam) everything works fine, but with other voices I run into problems. I have tried these voices so far:

    Cepstral Voices :http://www.cepstral.com/

    Both Diane and Callie voices do not speak at all, and it appears that they fire no events

    Neospeech: http://www.neospeech.com/default.aspx

    VW Kate's voice speaks and fires appropriate viseme & end stream events but no information on emphasis is contained in the HIWORD(lparam) of the event, so the visemes look funny

    Do only Microsoft voices fire the complete set of events, or have I just had bad luck with these two? Does anyone have any recommendations on where I can get high quality voices that will fire these events?

    Thanks so much for your time. In case it is something strange I am doing programatically, my code is below.

    Lauren

    DWORD WINAPI DIGuy::sayMessage(LPVOID lpParam){
    	try{
    		ThreadParam * param = (ThreadParam *)lpParam;
    		wstring s = param->message;
    		wstring characterName = param->sceneObject.name;
    
    		//first check the string for null
    		if (s == L""){
    			//Error::recordError("mailbox read error in tts_client", MAILBOX_READ_ERROR, GetLastError());
    			return false;
    		}
    
    		//http://msdn.microsoft.com/en-us/library/ms720163(VS.85,classic).asp is my source for this
    		//set up text to speech
    
    		if (FAILED(::CoInitialize(NULL)))
    			return false;
    
    		HRESULT hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void **)&pVoice);
    		if( SUCCEEDED( hr ) )
    		{
    			SPEVENT event;
    			ULONG ul;
    
    			ISpObjectToken* pToken;
    
    			//female voices
    			if (param->sceneObject.gender == FEMALE){
    				if (param->sceneObject.age == ADULT){
    //right now this is the only case I'm trying. I change out voices here for testing
    					hr = SpFindBestToken(SPCAT_VOICES, L"Gender=Female", L"Gender=Female", &pToken);
    					if (FAILED(hr)){
    						SpFindBestToken(SPCAT_VOICES, L"Gender=Female",L"",&pToken);
    					}
    					pVoice->SetVoice(pToken);
    				}
    				else{
    					hr = SpFindBestToken(SPCAT_VOICES, L"Gender=Female", L""/*"Name=VW Kate"*/, &pToken);
    					s = L"<pitch middle=\"+10\">"+s+L"</pitch>";
    					pVoice->SetVoice(pToken);
    				}
    			}
    
    			//male voices
    			else{
    				if (param->sceneObject.age == ADULT){
    					hr = SpFindBestToken(SPCAT_VOICES, L"Gender=Male", L"Age=Adult", &pToken);
    					pVoice->SetVoice(pToken);
    				}
    				else{
    					hr = SpFindBestToken(SPCAT_VOICES, L"Gender=Male", L"Age=Child", &pToken);
    					pVoice->SetVoice(pToken);
    				}
    
    			}
    			pVoice->SetInterest(SPFEI(SPEI_VISEME)|SPFEI(SPEI_END_INPUT_STREAM),SPFEI(SPEI_VISEME)|SPFEI(SPEI_END_INPUT_STREAM));
    			pVoice->SetNotifyCallbackFunction(&eventFunction,0,0);
    			pVoice->WaitForNotifyEvent(INFINITE);
    			pVoice->Speak(s.c_str(),SPF_ASYNC,NULL);
    			bool isDone = false;
    			while(!isDone){
    				while(pVoice->GetEvents(1,&event, &ul) == S_OK){
    					if(event.eEventId==SPEI_VISEME){
    
    						string message;
    						//convert the viseme to the right string and add it to the message
    						int vis = LOWORD(event.lParam);
    						int stress = HIWORD(event.lParam);
    						if (stress != 0) vis = SP_VISEME_0;
    						ofstream outfile;
    						outfile.open("visemes.txt",ios::app);
    						outfile<<vis<<endl;
    						outfile.close();
    						switch(vis){
    							//neutral
    							case SP_VISEME_0:
    								DIGuy::setViseme(characterName, "all");
    								break;
    
    								//ae ax ah
    							case SP_VISEME_1:
    								DIGuy::setViseme(characterName, "aa");
    								break;
    
    								//aa
    							case SP_VISEME_2:
    								DIGuy::setViseme(characterName, "aa");
    								break;
    
    								//ao
    							case SP_VISEME_3:
    								DIGuy::setViseme(characterName, "aa");
    								break;
    
    								//ey eh uh
    							case SP_VISEME_4:
    								DIGuy::setViseme(characterName, "ey");
    								break;
    
    								//er
    							case SP_VISEME_5:
    								DIGuy::setViseme(characterName, "eh");
    								break;
    
    								//y iy ih ix
    							case SP_VISEME_6:
    								DIGuy::setViseme(characterName, "iy");
    								break;
    
    								//w uw
    							case SP_VISEME_7:
    								DIGuy::setViseme(characterName, "uw");
    								break;
    
    								//ow
    							case SP_VISEME_8:
    								DIGuy::setViseme(characterName, "ow");
    								break;
    
    								//aw
    							case SP_VISEME_9:
    								DIGuy::setViseme(characterName, "ow");
    								break;
    
    								//oy
    							case SP_VISEME_10:
    								DIGuy::setViseme(characterName, "ey");
    								break;
    
    								//ay
    							case SP_VISEME_11:
    								DIGuy::setViseme(characterName, "ey");
    								break;
    
    								//h
    							case SP_VISEME_12:
    								DIGuy::setViseme(characterName, "ih");
    								break;
    
    								//r
    							case SP_VISEME_13:
    								DIGuy::setViseme(characterName, "r");
    								break;
    
    								//l
    							case SP_VISEME_14:
    								DIGuy::setViseme(characterName, "el");
    								break;
    
    								// s z
    							case SP_VISEME_15:
    								DIGuy::setViseme(characterName, "s");
    								break;
    
    								//sh ch jh zh
    							case SP_VISEME_16:
    								DIGuy::setViseme(characterName, "ch");
    								break;
    
    								//th dh
    							case SP_VISEME_17:
    								DIGuy::setViseme(characterName, "th");
    								break;
    
    								//f v
    							case SP_VISEME_18:
    								DIGuy::setViseme(characterName, "f");
    								break;
    
    								//d t n
    							case SP_VISEME_19:
    								DIGuy::setViseme(characterName, "d");
    								break;
    
    								//k g ng
    							case SP_VISEME_20:
    								DIGuy::setViseme(characterName, "g");
    								break;
    
    								//p b m
    							case SP_VISEME_21:
    								DIGuy::setViseme(characterName, "b");
    								break;
    							
    							default:
    								DIGuy::setViseme(characterName, "all");
    								break;
    						}
    						visemeChanged = true;
    					}
    					else if(event.eEventId== SPEI_END_INPUT_STREAM){
    						isDone = true;
    						s = L"";
    						pVoice->Release();
    						pVoice = NULL;
    						return true;
    					}
    				}					
    			}
    		}
    	}
    	catch(...){
    
    		return false;
    	}		
    	return true;	
    }
    

    • Moved by SamAgain Wednesday, October 20, 2010 10:14 AM not bcl question (From:.NET Base Class Library)
    Friday, October 8, 2010 5:45 PM

Answers