Skip to content

Instantly share code, notes, and snippets.

@mediumTaj
Created July 16, 2018 18:16
Show Gist options
  • Save mediumTaj/88fdcc4b2f9da1d5c32c9c9a083c00eb to your computer and use it in GitHub Desktop.
Save mediumTaj/88fdcc4b2f9da1d5c32c9c9a083c00eb to your computer and use it in GitHub Desktop.
IBM Watson SDK for Unity example streaming with language translator
/**
* Copyright 2015 IBM Corp. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
using UnityEngine;
using System.Collections;
using IBM.Watson.DeveloperCloud.Logging;
using IBM.Watson.DeveloperCloud.Services.SpeechToText.v1;
using IBM.Watson.DeveloperCloud.Services.LanguageTranslator.v3;
using IBM.Watson.DeveloperCloud.Utilities;
using IBM.Watson.DeveloperCloud.DataTypes;
using System.Collections.Generic;
using UnityEngine.UI;
using System;
using IBM.Watson.DeveloperCloud.Connection;
public class ExampleStreaming : MonoBehaviour
{
#region PLEASE SET THESE VARIABLES IN THE INSPECTOR
[Space(10)]
[Tooltip("The service url for the Speech to Text service (optional). This defaults to \"https://stream.watsonplatform.net/speech-to-text/api\"")]
[SerializeField]
private string _STTServiceUrl;
[Tooltip("Text field to display the results of streaming.")]
public Text ResultsField;
[Header("CF Authentication")]
[Tooltip("The CF speech to text service username.")]
[SerializeField]
private string _STTServiceUsername;
[Tooltip("The CF speech to text service password.")]
[SerializeField]
private string _STTServicePassword;
[Header("IAM Authentication")]
[Tooltip("The IAM apikey.")]
[SerializeField]
private string _STTServiceIamApikey;
[Tooltip("The IAM url used to authenticate the apikey (optional). This defaults to \"https://iam.bluemix.net/identity/token\".")]
[SerializeField]
private string _STTServiceIamUrl;
[Header("Language Translator")]
[Space(10)]
[Tooltip("The IAM apikey for the Language Translator service.")]
[SerializeField]
private string _LTServiceIamApikey;
[Tooltip("The service url for the Language Translator service.")]
[SerializeField]
private string _LTServiceUrl;
#endregion
private int _recordingRoutine = 0;
private string _microphoneID = null;
private AudioClip _recording = null;
private int _recordingBufferSize = 1;
private int _recordingHZ = 22050;
private SpeechToText _service;
private LanguageTranslator _languageTranslatorService;
void Start()
{
LogSystem.InstallDefaultReactors();
Runnable.Run(CreateService());
}
private IEnumerator CreateService()
{
// Create credential and instantiate service
Credentials credentials = null;
if (!string.IsNullOrEmpty(_STTServiceUsername) && !string.IsNullOrEmpty(_STTServicePassword))
{
// Authenticate using username and password
credentials = new Credentials(_STTServiceUsername, _STTServicePassword, _STTServiceUrl);
}
else if (!string.IsNullOrEmpty(_STTServiceIamApikey))
{
// Authenticate using iamApikey
TokenOptions tokenOptions = new TokenOptions()
{
IamApiKey = _STTServiceIamApikey,
IamUrl = _STTServiceIamUrl
};
credentials = new Credentials(tokenOptions, _STTServiceUrl);
// Wait for tokendata
while (!credentials.HasIamTokenData())
yield return null;
}
else
{
throw new WatsonException("Please provide either username and password or IAM apikey to authenticate the service.");
}
_service = new SpeechToText(credentials);
_service.StreamMultipart = true;
//_service.RecognizeModel = "";
TokenOptions languageTranslatorTokenOptions = new TokenOptions()
{
IamApiKey = _LTServiceIamApikey
};
Credentials languageTranslatorCredentials = new Credentials(languageTranslatorTokenOptions, _LTServiceUrl);
// Wait for tokendata
while (!languageTranslatorCredentials.HasIamTokenData())
yield return null;
_languageTranslatorService = new LanguageTranslator("2018-07-17", languageTranslatorCredentials);
Active = true;
StartRecording();
}
public bool Active
{
get { return _service.IsListening; }
set
{
if (value && !_service.IsListening)
{
_service.DetectSilence = true;
_service.EnableWordConfidence = true;
_service.EnableTimestamps = true;
_service.SilenceThreshold = 0.01f;
_service.MaxAlternatives = 0;
_service.EnableInterimResults = true;
_service.OnError = OnError;
_service.InactivityTimeout = -1;
_service.ProfanityFilter = false;
_service.SmartFormatting = true;
_service.SpeakerLabels = false;
_service.WordAlternativesThreshold = null;
_service.StartListening(OnRecognize, OnRecognizeSpeaker);
}
else if (!value && _service.IsListening)
{
_service.StopListening();
}
}
}
private void StartRecording()
{
if (_recordingRoutine == 0)
{
UnityObjectUtil.StartDestroyQueue();
_recordingRoutine = Runnable.Run(RecordingHandler());
}
}
private void StopRecording()
{
if (_recordingRoutine != 0)
{
Microphone.End(_microphoneID);
Runnable.Stop(_recordingRoutine);
_recordingRoutine = 0;
}
}
private void OnError(string error)
{
Active = false;
Log.Debug("ExampleStreaming.OnError()", "Error! {0}", error);
}
private IEnumerator RecordingHandler()
{
Log.Debug("ExampleStreaming.RecordingHandler()", "devices: {0}", Microphone.devices);
_recording = Microphone.Start(_microphoneID, true, _recordingBufferSize, _recordingHZ);
yield return null; // let _recordingRoutine get set..
if (_recording == null)
{
StopRecording();
yield break;
}
bool bFirstBlock = true;
int midPoint = _recording.samples / 2;
float[] samples = null;
while (_recordingRoutine != 0 && _recording != null)
{
int writePos = Microphone.GetPosition(_microphoneID);
if (writePos > _recording.samples || !Microphone.IsRecording(_microphoneID))
{
Log.Error("ExampleStreaming.RecordingHandler()", "Microphone disconnected.");
StopRecording();
yield break;
}
if ((bFirstBlock && writePos >= midPoint)
|| (!bFirstBlock && writePos < midPoint))
{
// front block is recorded, make a RecordClip and pass it onto our callback.
samples = new float[midPoint];
_recording.GetData(samples, bFirstBlock ? 0 : midPoint);
AudioData record = new AudioData();
record.MaxLevel = Mathf.Max(Mathf.Abs(Mathf.Min(samples)), Mathf.Max(samples));
record.Clip = AudioClip.Create("Recording", midPoint, _recording.channels, _recordingHZ, false);
record.Clip.SetData(samples, 0);
_service.OnListen(record);
bFirstBlock = !bFirstBlock;
}
else
{
// calculate the number of samples remaining until we ready for a block of audio,
// and wait that amount of time it will take to record.
int remaining = bFirstBlock ? (midPoint - writePos) : (_recording.samples - writePos);
float timeRemaining = (float)remaining / (float)_recordingHZ;
yield return new WaitForSeconds(timeRemaining);
}
}
yield break;
}
private void OnRecognize(SpeechRecognitionEvent result, Dictionary<string, object> customData)
{
if (result != null && result.results.Length > 0)
{
foreach (var res in result.results)
{
foreach (var alt in res.alternatives)
{
string text = string.Format("{0} ({1}, {2:0.00})\n", alt.transcript, res.final ? "Final" : "Interim", alt.confidence);
Log.Debug("ExampleStreaming.OnRecognize()", text);
//ResultsField.text = text;
_languageTranslatorService.GetTranslation(OnTranslateSuccess, OnTranslateFail, text, "en-de");
}
if (res.keywords_result != null && res.keywords_result.keyword != null)
{
foreach (var keyword in res.keywords_result.keyword)
{
Log.Debug("ExampleStreaming.OnRecognize()", "keyword: {0}, confidence: {1}, start time: {2}, end time: {3}", keyword.normalized_text, keyword.confidence, keyword.start_time, keyword.end_time);
}
}
if (res.word_alternatives != null)
{
foreach (var wordAlternative in res.word_alternatives)
{
Log.Debug("ExampleStreaming.OnRecognize()", "Word alternatives found. Start time: {0} | EndTime: {1}", wordAlternative.start_time, wordAlternative.end_time);
foreach(var alternative in wordAlternative.alternatives)
Log.Debug("ExampleStreaming.OnRecognize()", "\t word: {0} | confidence: {1}", alternative.word, alternative.confidence);
}
}
}
}
}
private void OnTranslateFail(RESTConnector.Error error, Dictionary<string, object> customData)
{
Log.Debug("ExampleStreaming", "Translation failed: {0}", error.ErrorMessage);
}
private void OnTranslateSuccess(Translations response, Dictionary<string, object> customData)
{
ResultsField.text = response.translations[0].translation;
}
private void OnRecognizeSpeaker(SpeakerRecognitionEvent result, Dictionary<string, object> customData)
{
if (result != null)
{
foreach (SpeakerLabelsResult labelResult in result.speaker_labels)
{
Log.Debug("ExampleStreaming.OnRecognize()", string.Format("speaker result: {0} | confidence: {3} | from: {1} | to: {2}", labelResult.speaker, labelResult.from, labelResult.to, labelResult.confidence));
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment