Last active
October 29, 2018 16:18
-
-
Save mediumTaj/949f393c2e8693256460cf07a9d30cd3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Copyright 2015 IBM Corp. All Rights Reserved. | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
* | |
*/ | |
using UnityEngine; | |
using System.Collections; | |
using IBM.Watson.DeveloperCloud.Logging; | |
using IBM.Watson.DeveloperCloud.Services.SpeechToText.v1; | |
using IBM.Watson.DeveloperCloud.Utilities; | |
using IBM.Watson.DeveloperCloud.DataTypes; | |
using System.Collections.Generic; | |
using UnityEngine.UI; | |
using IBM.Watson.DeveloperCloud.Services.NaturalLanguageUnderstanding.v1; | |
using IBM.Watson.DeveloperCloud.Services.LanguageTranslator.v3; | |
using System; | |
public class ExampleStreaming : MonoBehaviour | |
{ | |
#region PLEASE SET THESE VARIABLES IN THE INSPECTOR | |
[Space(10)] | |
[Tooltip("The service URL (optional). This defaults to \"https://stream.watsonplatform.net/speech-to-text/api\"")] | |
[SerializeField] | |
private string _serviceUrl; | |
[Tooltip("Text field to display the results of streaming.")] | |
public Text ResultsField; | |
[Header("CF Authentication")] | |
[Tooltip("The authentication username.")] | |
[SerializeField] | |
private string _username; | |
[Tooltip("The authentication password.")] | |
[SerializeField] | |
private string _password; | |
[Header("IAM Authentication")] | |
[Tooltip("The IAM apikey.")] | |
[SerializeField] | |
private string _iamApikey; | |
[Tooltip("The IAM url used to authenticate the apikey (optional). This defaults to \"https://iam.bluemix.net/identity/token\".")] | |
[SerializeField] | |
private string _iamUrl; | |
[SerializeField] | |
private Text NaturalLanguageUnderstandingText; | |
[SerializeField] | |
private Text LanguageTranslatorText; | |
#endregion | |
private int _recordingRoutine = 0; | |
private string _microphoneID = null; | |
private AudioClip _recording = null; | |
private int _recordingBufferSize = 1; | |
private int _recordingHZ = 22050; | |
private SpeechToText _speechToTextService; | |
private NaturalLanguageUnderstanding _naturalLanguageUnderstandingService; | |
private LanguageTranslator _languageTranslatorService; | |
void Start() | |
{ | |
LogSystem.InstallDefaultReactors(); | |
Runnable.Run(CreateService()); | |
} | |
private IEnumerator CreateService() | |
{ | |
// Create credential and instantiate service | |
Credentials speechToTextCredentials = null; | |
if (!string.IsNullOrEmpty(_username) && !string.IsNullOrEmpty(_password)) | |
{ | |
// Authenticate using username and password | |
speechToTextCredentials = new Credentials(_username, _password, _serviceUrl); | |
} | |
else if (!string.IsNullOrEmpty(_iamApikey)) | |
{ | |
// Authenticate using iamApikey | |
TokenOptions tokenOptions = new TokenOptions() | |
{ | |
IamApiKey = _iamApikey, | |
IamUrl = _iamUrl | |
}; | |
speechToTextCredentials = new Credentials(tokenOptions, _serviceUrl); | |
// Wait for tokendata | |
while (!speechToTextCredentials.HasIamTokenData()) | |
yield return null; | |
} | |
else | |
{ | |
throw new WatsonException("Please provide either username and password or IAM apikey to authenticate the service."); | |
} | |
_speechToTextService = new SpeechToText(speechToTextCredentials); | |
_speechToTextService.StreamMultipart = true; | |
Credentials naturalLanguageUnderstandingCredentials = new Credentials() | |
{ | |
#region Credentials | |
Username = "", | |
Password = "" | |
#endregion | |
}; | |
_naturalLanguageUnderstandingService = new NaturalLanguageUnderstanding(naturalLanguageUnderstandingCredentials); | |
_naturalLanguageUnderstandingService.VersionDate = "2018-10-22"; | |
TokenOptions languageTranslatorTokenOptions = new TokenOptions() | |
{ | |
#region Credentials | |
IamApiKey = "" | |
#endregion | |
}; | |
Credentials languageTranslatorCredentials = new Credentials(languageTranslatorTokenOptions, "https://gateway.watsonplatform.net/language-translator/api"); | |
// Wait for tokendata | |
while (!languageTranslatorCredentials.HasIamTokenData()) | |
yield return null; | |
_languageTranslatorService = new LanguageTranslator("2018-07-17", languageTranslatorCredentials); | |
Active = true; | |
StartRecording(); | |
} | |
public bool Active | |
{ | |
get { return _speechToTextService.IsListening; } | |
set | |
{ | |
if (value && !_speechToTextService.IsListening) | |
{ | |
_speechToTextService.DetectSilence = true; | |
_speechToTextService.EnableWordConfidence = true; | |
_speechToTextService.EnableTimestamps = true; | |
_speechToTextService.SilenceThreshold = 0.01f; | |
_speechToTextService.MaxAlternatives = 0; | |
_speechToTextService.EnableInterimResults = true; | |
_speechToTextService.OnError = OnError; | |
_speechToTextService.InactivityTimeout = -1; | |
_speechToTextService.ProfanityFilter = false; | |
_speechToTextService.SmartFormatting = true; | |
_speechToTextService.SpeakerLabels = false; | |
_speechToTextService.WordAlternativesThreshold = null; | |
_speechToTextService.StartListening(OnRecognize, OnRecognizeSpeaker); | |
} | |
else if (!value && _speechToTextService.IsListening) | |
{ | |
_speechToTextService.StopListening(); | |
} | |
} | |
} | |
private void StartRecording() | |
{ | |
if (_recordingRoutine == 0) | |
{ | |
UnityObjectUtil.StartDestroyQueue(); | |
_recordingRoutine = Runnable.Run(RecordingHandler()); | |
} | |
} | |
private void StopRecording() | |
{ | |
if (_recordingRoutine != 0) | |
{ | |
Microphone.End(_microphoneID); | |
Runnable.Stop(_recordingRoutine); | |
_recordingRoutine = 0; | |
} | |
} | |
private void OnError(string error) | |
{ | |
Active = false; | |
Log.Debug("ExampleStreaming.OnError()", "Error! {0}", error); | |
} | |
private IEnumerator RecordingHandler() | |
{ | |
Log.Debug("ExampleStreaming.RecordingHandler()", "devices: {0}", Microphone.devices); | |
_recording = Microphone.Start(_microphoneID, true, _recordingBufferSize, _recordingHZ); | |
yield return null; // let _recordingRoutine get set.. | |
if (_recording == null) | |
{ | |
StopRecording(); | |
yield break; | |
} | |
bool bFirstBlock = true; | |
int midPoint = _recording.samples / 2; | |
float[] samples = null; | |
while (_recordingRoutine != 0 && _recording != null) | |
{ | |
int writePos = Microphone.GetPosition(_microphoneID); | |
if (writePos > _recording.samples || !Microphone.IsRecording(_microphoneID)) | |
{ | |
Log.Error("ExampleStreaming.RecordingHandler()", "Microphone disconnected."); | |
StopRecording(); | |
yield break; | |
} | |
if ((bFirstBlock && writePos >= midPoint) | |
|| (!bFirstBlock && writePos < midPoint)) | |
{ | |
// front block is recorded, make a RecordClip and pass it onto our callback. | |
samples = new float[midPoint]; | |
_recording.GetData(samples, bFirstBlock ? 0 : midPoint); | |
AudioData record = new AudioData(); | |
record.MaxLevel = Mathf.Max(Mathf.Abs(Mathf.Min(samples)), Mathf.Max(samples)); | |
record.Clip = AudioClip.Create("Recording", midPoint, _recording.channels, _recordingHZ, false); | |
record.Clip.SetData(samples, 0); | |
_speechToTextService.OnListen(record); | |
bFirstBlock = !bFirstBlock; | |
} | |
else | |
{ | |
// calculate the number of samples remaining until we ready for a block of audio, | |
// and wait that amount of time it will take to record. | |
int remaining = bFirstBlock ? (midPoint - writePos) : (_recording.samples - writePos); | |
float timeRemaining = (float)remaining / (float)_recordingHZ; | |
yield return new WaitForSeconds(timeRemaining); | |
} | |
} | |
yield break; | |
} | |
private void OnRecognize(SpeechRecognitionEvent result, Dictionary<string, object> customData) | |
{ | |
if (result != null && result.results.Length > 0) | |
{ | |
foreach (var res in result.results) | |
{ | |
foreach (var alt in res.alternatives) | |
{ | |
string text = string.Format("{0} ({1}, {2:0.00})\n", alt.transcript, res.final ? "Final" : "Interim", alt.confidence); | |
ResultsField.text = text; | |
if (res.final) | |
{ | |
Parameters naturalLanguageUnderstandingParameters = new Parameters() | |
{ | |
text = alt.transcript, | |
features = new Features() | |
{ | |
entities = new EntitiesOptions() | |
{ | |
limit = 10, | |
model = "alchemy" | |
}, | |
keywords = new KeywordsOptions() | |
{ | |
limit = 10 | |
}, | |
emotion = new EmotionOptions() | |
{ | |
document = true | |
}, | |
sentiment = new SentimentOptions() | |
{ | |
document = true | |
}, | |
categories = new CategoriesOptions() | |
{ | |
} | |
}, | |
language = "en" | |
}; | |
_naturalLanguageUnderstandingService.Analyze(HandleAnalyze, HandleFail, naturalLanguageUnderstandingParameters); | |
_languageTranslatorService.GetTranslation(HandleTranslate, HandleFail, alt.transcript, "en-es"); | |
} | |
} | |
if (res.keywords_result != null && res.keywords_result.keyword != null) | |
{ | |
foreach (var keyword in res.keywords_result.keyword) | |
{ | |
Log.Debug("ExampleStreaming.OnRecognize()", "keyword: {0}, confidence: {1}, start time: {2}, end time: {3}", keyword.normalized_text, keyword.confidence, keyword.start_time, keyword.end_time); | |
} | |
} | |
if (res.word_alternatives != null) | |
{ | |
foreach (var wordAlternative in res.word_alternatives) | |
{ | |
Log.Debug("ExampleStreaming.OnRecognize()", "Word alternatives found. Start time: {0} | EndTime: {1}", wordAlternative.start_time, wordAlternative.end_time); | |
foreach(var alternative in wordAlternative.alternatives) | |
Log.Debug("ExampleStreaming.OnRecognize()", "\t word: {0} | confidence: {1}", alternative.word, alternative.confidence); | |
} | |
} | |
} | |
} | |
} | |
private void HandleTranslate(Translations response, Dictionary<string, object> customData) | |
{ | |
LanguageTranslatorText.text = response.translations[0].translation; | |
} | |
private void OnRecognizeSpeaker(SpeakerRecognitionEvent result, Dictionary<string, object> customData) | |
{ | |
if (result != null) | |
{ | |
foreach (SpeakerLabelsResult labelResult in result.speaker_labels) | |
{ | |
Log.Debug("ExampleStreaming.OnRecognize()", string.Format("speaker result: {0} | confidence: {3} | from: {1} | to: {2}", labelResult.speaker, labelResult.from, labelResult.to, labelResult.confidence)); | |
} | |
} | |
} | |
void HandleAnalyze(AnalysisResults response, Dictionary<string, object> customData) | |
{ | |
if (response.entities != null && response.entities.Length > 0) | |
{ | |
foreach (EntitiesResult entity in response.entities) | |
{ | |
string currentText = NaturalLanguageUnderstandingText.text; | |
NaturalLanguageUnderstandingText.text = string.Format("entity: <b>{0}</b>, type: {1}", entity.text, entity.type, entity) + "\n" + currentText; | |
} | |
} | |
if (response.keywords != null && response.keywords.Length > 0) | |
{ | |
foreach (KeywordsResult keyword in response.keywords) | |
{ | |
string currentText = NaturalLanguageUnderstandingText.text; | |
NaturalLanguageUnderstandingText.text = string.Format("keyword: <b>{0}</b>, relevance: {1}", keyword.text, keyword.relevance) + "\n" + currentText; | |
} | |
} | |
if (response.sentiment != null && response.sentiment.targets != null) | |
{ | |
string currentText = NaturalLanguageUnderstandingText.text; | |
NaturalLanguageUnderstandingText.text = string.Format("sentiment: <b>{0}</b>", response.sentiment) + "\n" + currentText; | |
} | |
if (response.emotion != null && response.emotion.document != null) | |
{ | |
string currentText = NaturalLanguageUnderstandingText.text; | |
NaturalLanguageUnderstandingText.text = string.Format("<b>anger:</b> {0} <b>disgust:</b> {1}, <b>fear:</b> {2}, <b>joy:</b> {3}, <b>sadness:</b> {4}", | |
System.Math.Round(response.emotion.document.emotion.anger, 2), | |
System.Math.Round(response.emotion.document.emotion.disgust, 2), | |
System.Math.Round(response.emotion.document.emotion.fear, 2), | |
System.Math.Round(response.emotion.document.emotion.joy, 2), | |
System.Math.Round(response.emotion.document.emotion.sadness, 2)) + "\n" + currentText; | |
} | |
if (response.categories != null && response.categories.Length > 0) | |
{ | |
string currentText = NaturalLanguageUnderstandingText.text; | |
NaturalLanguageUnderstandingText.text = string.Format("categories: <b>{0}</b>, score: {1}", response.categories[0].label, response.categories[0].score) + "\n" + currentText; | |
} | |
} | |
void HandleFail(IBM.Watson.DeveloperCloud.Connection.RESTConnector.Error error, Dictionary<string, object> customData) | |
{ | |
Log.Debug("ExampleStreaming, HandleFail()", "Failed to send: {0}", error.ErrorMessage); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment