Last active
May 23, 2018 20:24
-
-
Save akeller/281864b1ab3d0a592642158a1f809b2a to your computer and use it in GitHub Desktop.
Watson Unity SDK Getting Started - Speech to Text
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Collections; | |
using System.Collections.Generic; | |
using UnityEngine; | |
using IBM.Watson.DeveloperCloud.Services.SpeechToText.v1; | |
using IBM.Watson.DeveloperCloud.Widgets; | |
using IBM.Watson.DeveloperCloud.DataTypes; | |
using IBM.Watson.DeveloperCloud.Utilities; | |
using IBM.Watson.DeveloperCloud.Logging; | |
using IBM.Watson.DeveloperCloud.Connection; | |
using System.IO; | |
using FullSerializer; | |
public class PlayerController : MonoBehaviour { | |
private int _recordingRoutine = 0; | |
private string _microphoneID = null; | |
private AudioClip _recording = null; | |
private int _recordingBufferSize = 2; | |
private int _recordingHZ = 22050; | |
private SpeechToText _speechToText; | |
void Start() | |
{ | |
Credentials credentials3 = new Credentials(<username>, <password>, "https://stream.watsonplatform.net/speech-to-text/api"); | |
_speechToText = new SpeechToText(credentials3); | |
onListen(); | |
} | |
private void OnListen() | |
{ | |
Active = true; | |
StartRecording(); | |
} | |
public bool Active | |
{ | |
get { return _speechToText.IsListening; } | |
set { | |
if ( value && !_speechToText.IsListening ) | |
{ | |
_speechToText.DetectSilence = true; | |
_speechToText.EnableWordConfidence = false; | |
_speechToText.EnableTimestamps = false; | |
_speechToText.SilenceThreshold = 0.03f; | |
_speechToText.MaxAlternatives = 1; | |
//_speechToText.EnableContinousRecognition = true; | |
_speechToText.EnableInterimResults = true; | |
_speechToText.OnError = OnError; | |
_speechToText.StartListening( OnSpeechInput ); | |
} | |
else if ( !value && _speechToText.IsListening ) | |
{ | |
_speechToText.StopListening(); | |
} | |
} | |
} | |
private void OnSpeechInput(SpeechRecognitionEvent result) | |
{ | |
if (result != null && result.results.Length > 0) | |
{ | |
foreach (var res in result.results) | |
{ | |
foreach (var alt in res.alternatives) | |
{ | |
if (res.final && alt.confidence > 0) | |
{ | |
string text = alt.transcript; | |
Debug.Log("Result: " + text + " Confidence: " + alt.confidence); | |
} | |
} | |
} | |
} | |
} | |
private void StartRecording() | |
{ | |
if (_recordingRoutine == 0) | |
{ | |
UnityObjectUtil.StartDestroyQueue(); | |
_recordingRoutine = Runnable.Run(RecordingHandler()); | |
} | |
} | |
private void StopRecording() | |
{ | |
if (_recordingRoutine != 0) | |
{ | |
Microphone.End(_microphoneID); | |
Runnable.Stop(_recordingRoutine); | |
_recordingRoutine = 0; | |
} | |
} | |
private void OnError( string error ) | |
{ | |
Active = false; | |
Log.Debug("ExampleStreaming", "Error! {0}", error); | |
} | |
private IEnumerator RecordingHandler() | |
{ | |
_recording = Microphone.Start(_microphoneID, true, _recordingBufferSize, _recordingHZ); | |
yield return null; | |
if (_recording == null) | |
{ | |
StopRecording(); | |
yield break; | |
} | |
bool bFirstBlock = true; | |
int midPoint = _recording.samples / 2; | |
float[] samples = null; | |
while (_recordingRoutine != 0 && _recording != null) | |
{ | |
int writePos = Microphone.GetPosition(_microphoneID); | |
if (writePos > _recording.samples || !Microphone.IsRecording(_microphoneID)) | |
{ | |
Log.Error("MicrophoneWidget", "Microphone disconnected."); | |
StopRecording(); | |
yield break; | |
} | |
if ((bFirstBlock && writePos >= midPoint) | |
|| (!bFirstBlock && writePos < midPoint)) | |
{ | |
// front block is recorded, make a RecordClip and pass it onto our callback. | |
samples = new float[midPoint]; | |
_recording.GetData(samples, bFirstBlock ? 0 : midPoint); | |
AudioData record = new AudioData(); | |
record.MaxLevel = Mathf.Max(samples); | |
record.Clip = AudioClip.Create("Recording", midPoint, _recording.channels, _recordingHZ, false); | |
record.Clip.SetData(samples, 0); | |
_speechToText.OnListen(record); | |
bFirstBlock = !bFirstBlock; | |
} | |
else | |
{ | |
// calculate the number of samples remaining until we ready for a block of audio, | |
// and wait that amount of time it will take to record. | |
int remaining = bFirstBlock ? (midPoint - writePos) : (_recording.samples - writePos); | |
float timeRemaining = (float)remaining / (float)_recordingHZ; | |
yield return new WaitForSeconds(timeRemaining); | |
} | |
} | |
yield break; | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
BREAKING CHANGES MADE TO SDK This is on my todo list to fix, but it is currently not working with the most recent version in the asset store.