Created
July 16, 2018 18:16
-
-
Save mediumTaj/88fdcc4b2f9da1d5c32c9c9a083c00eb to your computer and use it in GitHub Desktop.
IBM Watson SDK for Unity example streaming with language translator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Copyright 2015 IBM Corp. All Rights Reserved. | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
* | |
*/ | |
using UnityEngine; | |
using System.Collections; | |
using IBM.Watson.DeveloperCloud.Logging; | |
using IBM.Watson.DeveloperCloud.Services.SpeechToText.v1; | |
using IBM.Watson.DeveloperCloud.Services.LanguageTranslator.v3; | |
using IBM.Watson.DeveloperCloud.Utilities; | |
using IBM.Watson.DeveloperCloud.DataTypes; | |
using System.Collections.Generic; | |
using UnityEngine.UI; | |
using System; | |
using IBM.Watson.DeveloperCloud.Connection; | |
public class ExampleStreaming : MonoBehaviour | |
{ | |
#region PLEASE SET THESE VARIABLES IN THE INSPECTOR | |
[Space(10)] | |
[Tooltip("The service url for the Speech to Text service (optional). This defaults to \"https://stream.watsonplatform.net/speech-to-text/api\"")] | |
[SerializeField] | |
private string _STTServiceUrl; | |
[Tooltip("Text field to display the results of streaming.")] | |
public Text ResultsField; | |
[Header("CF Authentication")] | |
[Tooltip("The CF speech to text service username.")] | |
[SerializeField] | |
private string _STTServiceUsername; | |
[Tooltip("The CF speech to text service password.")] | |
[SerializeField] | |
private string _STTServicePassword; | |
[Header("IAM Authentication")] | |
[Tooltip("The IAM apikey.")] | |
[SerializeField] | |
private string _STTServiceIamApikey; | |
[Tooltip("The IAM url used to authenticate the apikey (optional). This defaults to \"https://iam.bluemix.net/identity/token\".")] | |
[SerializeField] | |
private string _STTServiceIamUrl; | |
[Header("Language Translator")] | |
[Space(10)] | |
[Tooltip("The IAM apikey for the Language Translator service.")] | |
[SerializeField] | |
private string _LTServiceIamApikey; | |
[Tooltip("The service url for the Language Translator service.")] | |
[SerializeField] | |
private string _LTServiceUrl; | |
#endregion | |
private int _recordingRoutine = 0; | |
private string _microphoneID = null; | |
private AudioClip _recording = null; | |
private int _recordingBufferSize = 1; | |
private int _recordingHZ = 22050; | |
private SpeechToText _service; | |
private LanguageTranslator _languageTranslatorService; | |
void Start() | |
{ | |
LogSystem.InstallDefaultReactors(); | |
Runnable.Run(CreateService()); | |
} | |
private IEnumerator CreateService() | |
{ | |
// Create credential and instantiate service | |
Credentials credentials = null; | |
if (!string.IsNullOrEmpty(_STTServiceUsername) && !string.IsNullOrEmpty(_STTServicePassword)) | |
{ | |
// Authenticate using username and password | |
credentials = new Credentials(_STTServiceUsername, _STTServicePassword, _STTServiceUrl); | |
} | |
else if (!string.IsNullOrEmpty(_STTServiceIamApikey)) | |
{ | |
// Authenticate using iamApikey | |
TokenOptions tokenOptions = new TokenOptions() | |
{ | |
IamApiKey = _STTServiceIamApikey, | |
IamUrl = _STTServiceIamUrl | |
}; | |
credentials = new Credentials(tokenOptions, _STTServiceUrl); | |
// Wait for tokendata | |
while (!credentials.HasIamTokenData()) | |
yield return null; | |
} | |
else | |
{ | |
throw new WatsonException("Please provide either username and password or IAM apikey to authenticate the service."); | |
} | |
_service = new SpeechToText(credentials); | |
_service.StreamMultipart = true; | |
//_service.RecognizeModel = ""; | |
TokenOptions languageTranslatorTokenOptions = new TokenOptions() | |
{ | |
IamApiKey = _LTServiceIamApikey | |
}; | |
Credentials languageTranslatorCredentials = new Credentials(languageTranslatorTokenOptions, _LTServiceUrl); | |
// Wait for tokendata | |
while (!languageTranslatorCredentials.HasIamTokenData()) | |
yield return null; | |
_languageTranslatorService = new LanguageTranslator("2018-07-17", languageTranslatorCredentials); | |
Active = true; | |
StartRecording(); | |
} | |
public bool Active | |
{ | |
get { return _service.IsListening; } | |
set | |
{ | |
if (value && !_service.IsListening) | |
{ | |
_service.DetectSilence = true; | |
_service.EnableWordConfidence = true; | |
_service.EnableTimestamps = true; | |
_service.SilenceThreshold = 0.01f; | |
_service.MaxAlternatives = 0; | |
_service.EnableInterimResults = true; | |
_service.OnError = OnError; | |
_service.InactivityTimeout = -1; | |
_service.ProfanityFilter = false; | |
_service.SmartFormatting = true; | |
_service.SpeakerLabels = false; | |
_service.WordAlternativesThreshold = null; | |
_service.StartListening(OnRecognize, OnRecognizeSpeaker); | |
} | |
else if (!value && _service.IsListening) | |
{ | |
_service.StopListening(); | |
} | |
} | |
} | |
private void StartRecording() | |
{ | |
if (_recordingRoutine == 0) | |
{ | |
UnityObjectUtil.StartDestroyQueue(); | |
_recordingRoutine = Runnable.Run(RecordingHandler()); | |
} | |
} | |
private void StopRecording() | |
{ | |
if (_recordingRoutine != 0) | |
{ | |
Microphone.End(_microphoneID); | |
Runnable.Stop(_recordingRoutine); | |
_recordingRoutine = 0; | |
} | |
} | |
private void OnError(string error) | |
{ | |
Active = false; | |
Log.Debug("ExampleStreaming.OnError()", "Error! {0}", error); | |
} | |
private IEnumerator RecordingHandler() | |
{ | |
Log.Debug("ExampleStreaming.RecordingHandler()", "devices: {0}", Microphone.devices); | |
_recording = Microphone.Start(_microphoneID, true, _recordingBufferSize, _recordingHZ); | |
yield return null; // let _recordingRoutine get set.. | |
if (_recording == null) | |
{ | |
StopRecording(); | |
yield break; | |
} | |
bool bFirstBlock = true; | |
int midPoint = _recording.samples / 2; | |
float[] samples = null; | |
while (_recordingRoutine != 0 && _recording != null) | |
{ | |
int writePos = Microphone.GetPosition(_microphoneID); | |
if (writePos > _recording.samples || !Microphone.IsRecording(_microphoneID)) | |
{ | |
Log.Error("ExampleStreaming.RecordingHandler()", "Microphone disconnected."); | |
StopRecording(); | |
yield break; | |
} | |
if ((bFirstBlock && writePos >= midPoint) | |
|| (!bFirstBlock && writePos < midPoint)) | |
{ | |
// front block is recorded, make a RecordClip and pass it onto our callback. | |
samples = new float[midPoint]; | |
_recording.GetData(samples, bFirstBlock ? 0 : midPoint); | |
AudioData record = new AudioData(); | |
record.MaxLevel = Mathf.Max(Mathf.Abs(Mathf.Min(samples)), Mathf.Max(samples)); | |
record.Clip = AudioClip.Create("Recording", midPoint, _recording.channels, _recordingHZ, false); | |
record.Clip.SetData(samples, 0); | |
_service.OnListen(record); | |
bFirstBlock = !bFirstBlock; | |
} | |
else | |
{ | |
// calculate the number of samples remaining until we ready for a block of audio, | |
// and wait that amount of time it will take to record. | |
int remaining = bFirstBlock ? (midPoint - writePos) : (_recording.samples - writePos); | |
float timeRemaining = (float)remaining / (float)_recordingHZ; | |
yield return new WaitForSeconds(timeRemaining); | |
} | |
} | |
yield break; | |
} | |
private void OnRecognize(SpeechRecognitionEvent result, Dictionary<string, object> customData) | |
{ | |
if (result != null && result.results.Length > 0) | |
{ | |
foreach (var res in result.results) | |
{ | |
foreach (var alt in res.alternatives) | |
{ | |
string text = string.Format("{0} ({1}, {2:0.00})\n", alt.transcript, res.final ? "Final" : "Interim", alt.confidence); | |
Log.Debug("ExampleStreaming.OnRecognize()", text); | |
//ResultsField.text = text; | |
_languageTranslatorService.GetTranslation(OnTranslateSuccess, OnTranslateFail, text, "en-de"); | |
} | |
if (res.keywords_result != null && res.keywords_result.keyword != null) | |
{ | |
foreach (var keyword in res.keywords_result.keyword) | |
{ | |
Log.Debug("ExampleStreaming.OnRecognize()", "keyword: {0}, confidence: {1}, start time: {2}, end time: {3}", keyword.normalized_text, keyword.confidence, keyword.start_time, keyword.end_time); | |
} | |
} | |
if (res.word_alternatives != null) | |
{ | |
foreach (var wordAlternative in res.word_alternatives) | |
{ | |
Log.Debug("ExampleStreaming.OnRecognize()", "Word alternatives found. Start time: {0} | EndTime: {1}", wordAlternative.start_time, wordAlternative.end_time); | |
foreach(var alternative in wordAlternative.alternatives) | |
Log.Debug("ExampleStreaming.OnRecognize()", "\t word: {0} | confidence: {1}", alternative.word, alternative.confidence); | |
} | |
} | |
} | |
} | |
} | |
private void OnTranslateFail(RESTConnector.Error error, Dictionary<string, object> customData) | |
{ | |
Log.Debug("ExampleStreaming", "Translation failed: {0}", error.ErrorMessage); | |
} | |
private void OnTranslateSuccess(Translations response, Dictionary<string, object> customData) | |
{ | |
ResultsField.text = response.translations[0].translation; | |
} | |
private void OnRecognizeSpeaker(SpeakerRecognitionEvent result, Dictionary<string, object> customData) | |
{ | |
if (result != null) | |
{ | |
foreach (SpeakerLabelsResult labelResult in result.speaker_labels) | |
{ | |
Log.Debug("ExampleStreaming.OnRecognize()", string.Format("speaker result: {0} | confidence: {3} | from: {1} | to: {2}", labelResult.speaker, labelResult.from, labelResult.to, labelResult.confidence)); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment