Skip to content

Instantly share code, notes, and snippets.

@mediumTaj
Last active October 29, 2018 16:18
Show Gist options
  • Save mediumTaj/949f393c2e8693256460cf07a9d30cd3 to your computer and use it in GitHub Desktop.
Save mediumTaj/949f393c2e8693256460cf07a9d30cd3 to your computer and use it in GitHub Desktop.
/**
* Copyright 2015 IBM Corp. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
using UnityEngine;
using System.Collections;
using IBM.Watson.DeveloperCloud.Logging;
using IBM.Watson.DeveloperCloud.Services.SpeechToText.v1;
using IBM.Watson.DeveloperCloud.Utilities;
using IBM.Watson.DeveloperCloud.DataTypes;
using System.Collections.Generic;
using UnityEngine.UI;
using IBM.Watson.DeveloperCloud.Services.NaturalLanguageUnderstanding.v1;
using IBM.Watson.DeveloperCloud.Services.LanguageTranslator.v3;
using System;
public class ExampleStreaming : MonoBehaviour
{
#region PLEASE SET THESE VARIABLES IN THE INSPECTOR
[Space(10)]
[Tooltip("The service URL (optional). This defaults to \"https://stream.watsonplatform.net/speech-to-text/api\"")]
[SerializeField]
private string _serviceUrl;
[Tooltip("Text field to display the results of streaming.")]
public Text ResultsField;
[Header("CF Authentication")]
[Tooltip("The authentication username.")]
[SerializeField]
private string _username;
[Tooltip("The authentication password.")]
[SerializeField]
private string _password;
[Header("IAM Authentication")]
[Tooltip("The IAM apikey.")]
[SerializeField]
private string _iamApikey;
[Tooltip("The IAM url used to authenticate the apikey (optional). This defaults to \"https://iam.bluemix.net/identity/token\".")]
[SerializeField]
private string _iamUrl;
[SerializeField]
private Text NaturalLanguageUnderstandingText;
[SerializeField]
private Text LanguageTranslatorText;
#endregion
private int _recordingRoutine = 0;
private string _microphoneID = null;
private AudioClip _recording = null;
private int _recordingBufferSize = 1;
private int _recordingHZ = 22050;
private SpeechToText _speechToTextService;
private NaturalLanguageUnderstanding _naturalLanguageUnderstandingService;
private LanguageTranslator _languageTranslatorService;
void Start()
{
LogSystem.InstallDefaultReactors();
Runnable.Run(CreateService());
}
private IEnumerator CreateService()
{
// Create credential and instantiate service
Credentials speechToTextCredentials = null;
if (!string.IsNullOrEmpty(_username) && !string.IsNullOrEmpty(_password))
{
// Authenticate using username and password
speechToTextCredentials = new Credentials(_username, _password, _serviceUrl);
}
else if (!string.IsNullOrEmpty(_iamApikey))
{
// Authenticate using iamApikey
TokenOptions tokenOptions = new TokenOptions()
{
IamApiKey = _iamApikey,
IamUrl = _iamUrl
};
speechToTextCredentials = new Credentials(tokenOptions, _serviceUrl);
// Wait for tokendata
while (!speechToTextCredentials.HasIamTokenData())
yield return null;
}
else
{
throw new WatsonException("Please provide either username and password or IAM apikey to authenticate the service.");
}
_speechToTextService = new SpeechToText(speechToTextCredentials);
_speechToTextService.StreamMultipart = true;
Credentials naturalLanguageUnderstandingCredentials = new Credentials()
{
#region Credentials
Username = "",
Password = ""
#endregion
};
_naturalLanguageUnderstandingService = new NaturalLanguageUnderstanding(naturalLanguageUnderstandingCredentials);
_naturalLanguageUnderstandingService.VersionDate = "2018-10-22";
TokenOptions languageTranslatorTokenOptions = new TokenOptions()
{
#region Credentials
IamApiKey = ""
#endregion
};
Credentials languageTranslatorCredentials = new Credentials(languageTranslatorTokenOptions, "https://gateway.watsonplatform.net/language-translator/api");
// Wait for tokendata
while (!languageTranslatorCredentials.HasIamTokenData())
yield return null;
_languageTranslatorService = new LanguageTranslator("2018-07-17", languageTranslatorCredentials);
Active = true;
StartRecording();
}
public bool Active
{
get { return _speechToTextService.IsListening; }
set
{
if (value && !_speechToTextService.IsListening)
{
_speechToTextService.DetectSilence = true;
_speechToTextService.EnableWordConfidence = true;
_speechToTextService.EnableTimestamps = true;
_speechToTextService.SilenceThreshold = 0.01f;
_speechToTextService.MaxAlternatives = 0;
_speechToTextService.EnableInterimResults = true;
_speechToTextService.OnError = OnError;
_speechToTextService.InactivityTimeout = -1;
_speechToTextService.ProfanityFilter = false;
_speechToTextService.SmartFormatting = true;
_speechToTextService.SpeakerLabels = false;
_speechToTextService.WordAlternativesThreshold = null;
_speechToTextService.StartListening(OnRecognize, OnRecognizeSpeaker);
}
else if (!value && _speechToTextService.IsListening)
{
_speechToTextService.StopListening();
}
}
}
private void StartRecording()
{
if (_recordingRoutine == 0)
{
UnityObjectUtil.StartDestroyQueue();
_recordingRoutine = Runnable.Run(RecordingHandler());
}
}
private void StopRecording()
{
if (_recordingRoutine != 0)
{
Microphone.End(_microphoneID);
Runnable.Stop(_recordingRoutine);
_recordingRoutine = 0;
}
}
private void OnError(string error)
{
Active = false;
Log.Debug("ExampleStreaming.OnError()", "Error! {0}", error);
}
private IEnumerator RecordingHandler()
{
Log.Debug("ExampleStreaming.RecordingHandler()", "devices: {0}", Microphone.devices);
_recording = Microphone.Start(_microphoneID, true, _recordingBufferSize, _recordingHZ);
yield return null; // let _recordingRoutine get set..
if (_recording == null)
{
StopRecording();
yield break;
}
bool bFirstBlock = true;
int midPoint = _recording.samples / 2;
float[] samples = null;
while (_recordingRoutine != 0 && _recording != null)
{
int writePos = Microphone.GetPosition(_microphoneID);
if (writePos > _recording.samples || !Microphone.IsRecording(_microphoneID))
{
Log.Error("ExampleStreaming.RecordingHandler()", "Microphone disconnected.");
StopRecording();
yield break;
}
if ((bFirstBlock && writePos >= midPoint)
|| (!bFirstBlock && writePos < midPoint))
{
// front block is recorded, make a RecordClip and pass it onto our callback.
samples = new float[midPoint];
_recording.GetData(samples, bFirstBlock ? 0 : midPoint);
AudioData record = new AudioData();
record.MaxLevel = Mathf.Max(Mathf.Abs(Mathf.Min(samples)), Mathf.Max(samples));
record.Clip = AudioClip.Create("Recording", midPoint, _recording.channels, _recordingHZ, false);
record.Clip.SetData(samples, 0);
_speechToTextService.OnListen(record);
bFirstBlock = !bFirstBlock;
}
else
{
// calculate the number of samples remaining until we ready for a block of audio,
// and wait that amount of time it will take to record.
int remaining = bFirstBlock ? (midPoint - writePos) : (_recording.samples - writePos);
float timeRemaining = (float)remaining / (float)_recordingHZ;
yield return new WaitForSeconds(timeRemaining);
}
}
yield break;
}
private void OnRecognize(SpeechRecognitionEvent result, Dictionary<string, object> customData)
{
if (result != null && result.results.Length > 0)
{
foreach (var res in result.results)
{
foreach (var alt in res.alternatives)
{
string text = string.Format("{0} ({1}, {2:0.00})\n", alt.transcript, res.final ? "Final" : "Interim", alt.confidence);
ResultsField.text = text;
if (res.final)
{
Parameters naturalLanguageUnderstandingParameters = new Parameters()
{
text = alt.transcript,
features = new Features()
{
entities = new EntitiesOptions()
{
limit = 10,
model = "alchemy"
},
keywords = new KeywordsOptions()
{
limit = 10
},
emotion = new EmotionOptions()
{
document = true
},
sentiment = new SentimentOptions()
{
document = true
},
categories = new CategoriesOptions()
{
}
},
language = "en"
};
_naturalLanguageUnderstandingService.Analyze(HandleAnalyze, HandleFail, naturalLanguageUnderstandingParameters);
_languageTranslatorService.GetTranslation(HandleTranslate, HandleFail, alt.transcript, "en-es");
}
}
if (res.keywords_result != null && res.keywords_result.keyword != null)
{
foreach (var keyword in res.keywords_result.keyword)
{
Log.Debug("ExampleStreaming.OnRecognize()", "keyword: {0}, confidence: {1}, start time: {2}, end time: {3}", keyword.normalized_text, keyword.confidence, keyword.start_time, keyword.end_time);
}
}
if (res.word_alternatives != null)
{
foreach (var wordAlternative in res.word_alternatives)
{
Log.Debug("ExampleStreaming.OnRecognize()", "Word alternatives found. Start time: {0} | EndTime: {1}", wordAlternative.start_time, wordAlternative.end_time);
foreach(var alternative in wordAlternative.alternatives)
Log.Debug("ExampleStreaming.OnRecognize()", "\t word: {0} | confidence: {1}", alternative.word, alternative.confidence);
}
}
}
}
}
private void HandleTranslate(Translations response, Dictionary<string, object> customData)
{
LanguageTranslatorText.text = response.translations[0].translation;
}
private void OnRecognizeSpeaker(SpeakerRecognitionEvent result, Dictionary<string, object> customData)
{
if (result != null)
{
foreach (SpeakerLabelsResult labelResult in result.speaker_labels)
{
Log.Debug("ExampleStreaming.OnRecognize()", string.Format("speaker result: {0} | confidence: {3} | from: {1} | to: {2}", labelResult.speaker, labelResult.from, labelResult.to, labelResult.confidence));
}
}
}
void HandleAnalyze(AnalysisResults response, Dictionary<string, object> customData)
{
if (response.entities != null && response.entities.Length > 0)
{
foreach (EntitiesResult entity in response.entities)
{
string currentText = NaturalLanguageUnderstandingText.text;
NaturalLanguageUnderstandingText.text = string.Format("entity: <b>{0}</b>, type: {1}", entity.text, entity.type, entity) + "\n" + currentText;
}
}
if (response.keywords != null && response.keywords.Length > 0)
{
foreach (KeywordsResult keyword in response.keywords)
{
string currentText = NaturalLanguageUnderstandingText.text;
NaturalLanguageUnderstandingText.text = string.Format("keyword: <b>{0}</b>, relevance: {1}", keyword.text, keyword.relevance) + "\n" + currentText;
}
}
if (response.sentiment != null && response.sentiment.targets != null)
{
string currentText = NaturalLanguageUnderstandingText.text;
NaturalLanguageUnderstandingText.text = string.Format("sentiment: <b>{0}</b>", response.sentiment) + "\n" + currentText;
}
if (response.emotion != null && response.emotion.document != null)
{
string currentText = NaturalLanguageUnderstandingText.text;
NaturalLanguageUnderstandingText.text = string.Format("<b>anger:</b> {0} <b>disgust:</b> {1}, <b>fear:</b> {2}, <b>joy:</b> {3}, <b>sadness:</b> {4}",
System.Math.Round(response.emotion.document.emotion.anger, 2),
System.Math.Round(response.emotion.document.emotion.disgust, 2),
System.Math.Round(response.emotion.document.emotion.fear, 2),
System.Math.Round(response.emotion.document.emotion.joy, 2),
System.Math.Round(response.emotion.document.emotion.sadness, 2)) + "\n" + currentText;
}
if (response.categories != null && response.categories.Length > 0)
{
string currentText = NaturalLanguageUnderstandingText.text;
NaturalLanguageUnderstandingText.text = string.Format("categories: <b>{0}</b>, score: {1}", response.categories[0].label, response.categories[0].score) + "\n" + currentText;
}
}
void HandleFail(IBM.Watson.DeveloperCloud.Connection.RESTConnector.Error error, Dictionary<string, object> customData)
{
Log.Debug("ExampleStreaming, HandleFail()", "Failed to send: {0}", error.ErrorMessage);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment