mediumTaj/ExampleStreamingLanguageTranslator.cs

## ExampleStreamingLanguageTranslator.cs
/**
* Copyright 2015 IBM Corp. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

using UnityEngine;
using System.Collections;
using IBM.Watson.DeveloperCloud.Logging;
using IBM.Watson.DeveloperCloud.Services.SpeechToText.v1;
using IBM.Watson.DeveloperCloud.Services.LanguageTranslator.v3;
using IBM.Watson.DeveloperCloud.Utilities;
using IBM.Watson.DeveloperCloud.DataTypes;
using System.Collections.Generic;
using UnityEngine.UI;
using System;
using IBM.Watson.DeveloperCloud.Connection;

public class ExampleStreaming : MonoBehaviour
{
    #region PLEASE SET THESE VARIABLES IN THE INSPECTOR
    [Space(10)]
    [Tooltip("The service url for the Speech to Text service (optional). This defaults to \"https://stream.watsonplatform.net/speech-to-text/api\"")]
    [SerializeField]
    private string _STTServiceUrl;
    [Tooltip("Text field to display the results of streaming.")]
    public Text ResultsField;
    [Header("CF Authentication")]
    [Tooltip("The CF speech to text service username.")]
    [SerializeField]
    private string _STTServiceUsername;
    [Tooltip("The CF speech to text service password.")]
    [SerializeField]
    private string _STTServicePassword;
    [Header("IAM Authentication")]
    [Tooltip("The IAM apikey.")]
    [SerializeField]
    private string _STTServiceIamApikey;
    [Tooltip("The IAM url used to authenticate the apikey (optional). This defaults to \"https://iam.bluemix.net/identity/token\".")]
    [SerializeField]
    private string _STTServiceIamUrl;
    [Header("Language Translator")]
    [Space(10)]
    [Tooltip("The IAM apikey for the Language Translator service.")]
    [SerializeField]
    private string _LTServiceIamApikey;
    [Tooltip("The service url for the Language Translator service.")]
    [SerializeField]
    private string _LTServiceUrl;
    #endregion


    private int _recordingRoutine = 0;
    private string _microphoneID = null;
    private AudioClip _recording = null;
    private int _recordingBufferSize = 1;
    private int _recordingHZ = 22050;

    private SpeechToText _service;
    private LanguageTranslator _languageTranslatorService;

    void Start()
    {
        LogSystem.InstallDefaultReactors();
        Runnable.Run(CreateService());
    }

    private IEnumerator CreateService()
    {
        //  Create credential and instantiate service
        Credentials credentials = null;
        if (!string.IsNullOrEmpty(_STTServiceUsername) && !string.IsNullOrEmpty(_STTServicePassword))
        {
            //  Authenticate using username and password
            credentials = new Credentials(_STTServiceUsername, _STTServicePassword, _STTServiceUrl);
        }
        else if (!string.IsNullOrEmpty(_STTServiceIamApikey))
        {
            //  Authenticate using iamApikey
            TokenOptions tokenOptions = new TokenOptions()
            {
                IamApiKey = _STTServiceIamApikey,
                IamUrl = _STTServiceIamUrl
            };

            credentials = new Credentials(tokenOptions, _STTServiceUrl);

            //  Wait for tokendata
            while (!credentials.HasIamTokenData())
                yield return null;
        }
        else
        {
            throw new WatsonException("Please provide either username and password or IAM apikey to authenticate the service.");
        }

        _service = new SpeechToText(credentials);
        _service.StreamMultipart = true;
        //_service.RecognizeModel = "";

        TokenOptions languageTranslatorTokenOptions = new TokenOptions()
        {
            IamApiKey = _LTServiceIamApikey
        };
        Credentials languageTranslatorCredentials = new Credentials(languageTranslatorTokenOptions, _LTServiceUrl);

        //  Wait for tokendata
        while (!languageTranslatorCredentials.HasIamTokenData())
            yield return null;

        _languageTranslatorService = new LanguageTranslator("2018-07-17", languageTranslatorCredentials);

        Active = true;
        StartRecording();
    }

    public bool Active
    {
        get { return _service.IsListening; }
        set
        {
            if (value && !_service.IsListening)
            {
                _service.DetectSilence = true;
                _service.EnableWordConfidence = true;
                _service.EnableTimestamps = true;
                _service.SilenceThreshold = 0.01f;
                _service.MaxAlternatives = 0;
                _service.EnableInterimResults = true;
                _service.OnError = OnError;
                _service.InactivityTimeout = -1;
                _service.ProfanityFilter = false;
                _service.SmartFormatting = true;
                _service.SpeakerLabels = false;
                _service.WordAlternativesThreshold = null;
                _service.StartListening(OnRecognize, OnRecognizeSpeaker);
            }
            else if (!value && _service.IsListening)
            {
                _service.StopListening();
            }
        }
    }

    private void StartRecording()
    {
        if (_recordingRoutine == 0)
        {
            UnityObjectUtil.StartDestroyQueue();
            _recordingRoutine = Runnable.Run(RecordingHandler());
        }
    }

    private void StopRecording()
    {
        if (_recordingRoutine != 0)
        {
            Microphone.End(_microphoneID);
            Runnable.Stop(_recordingRoutine);
            _recordingRoutine = 0;
        }
    }

    private void OnError(string error)
    {
        Active = false;

        Log.Debug("ExampleStreaming.OnError()", "Error! {0}", error);
    }

    private IEnumerator RecordingHandler()
    {
        Log.Debug("ExampleStreaming.RecordingHandler()", "devices: {0}", Microphone.devices);
        _recording = Microphone.Start(_microphoneID, true, _recordingBufferSize, _recordingHZ);
        yield return null;      // let _recordingRoutine get set..

        if (_recording == null)
        {
            StopRecording();
            yield break;
        }

        bool bFirstBlock = true;
        int midPoint = _recording.samples / 2;
        float[] samples = null;

        while (_recordingRoutine != 0 && _recording != null)
        {
            int writePos = Microphone.GetPosition(_microphoneID);
            if (writePos > _recording.samples || !Microphone.IsRecording(_microphoneID))
            {
                Log.Error("ExampleStreaming.RecordingHandler()", "Microphone disconnected.");

                StopRecording();
                yield break;
            }

            if ((bFirstBlock && writePos >= midPoint)
              || (!bFirstBlock && writePos < midPoint))
            {
                // front block is recorded, make a RecordClip and pass it onto our callback.
                samples = new float[midPoint];
                _recording.GetData(samples, bFirstBlock ? 0 : midPoint);

                AudioData record = new AudioData();
				record.MaxLevel = Mathf.Max(Mathf.Abs(Mathf.Min(samples)), Mathf.Max(samples));
                record.Clip = AudioClip.Create("Recording", midPoint, _recording.channels, _recordingHZ, false);
                record.Clip.SetData(samples, 0);

                _service.OnListen(record);

                bFirstBlock = !bFirstBlock;
            }
            else
            {
                // calculate the number of samples remaining until we ready for a block of audio,
                // and wait that amount of time it will take to record.
                int remaining = bFirstBlock ? (midPoint - writePos) : (_recording.samples - writePos);
                float timeRemaining = (float)remaining / (float)_recordingHZ;

                yield return new WaitForSeconds(timeRemaining);
            }

        }

        yield break;
    }

    private void OnRecognize(SpeechRecognitionEvent result, Dictionary<string, object> customData)
    {
        if (result != null && result.results.Length > 0)
        {
            foreach (var res in result.results)
            {
                foreach (var alt in res.alternatives)
                {
                    string text = string.Format("{0} ({1}, {2:0.00})\n", alt.transcript, res.final ? "Final" : "Interim", alt.confidence);
                    Log.Debug("ExampleStreaming.OnRecognize()", text);
                    //ResultsField.text = text;

                    _languageTranslatorService.GetTranslation(OnTranslateSuccess, OnTranslateFail, text, "en-de");
                }

                if (res.keywords_result != null && res.keywords_result.keyword != null)
                {
                    foreach (var keyword in res.keywords_result.keyword)
                    {
                        Log.Debug("ExampleStreaming.OnRecognize()", "keyword: {0}, confidence: {1}, start time: {2}, end time: {3}", keyword.normalized_text, keyword.confidence, keyword.start_time, keyword.end_time);
                    }
                }

                if (res.word_alternatives != null)
                {
                    foreach (var wordAlternative in res.word_alternatives)
                    {
                        Log.Debug("ExampleStreaming.OnRecognize()", "Word alternatives found. Start time: {0} | EndTime: {1}", wordAlternative.start_time, wordAlternative.end_time);
                        foreach(var alternative in wordAlternative.alternatives)
                            Log.Debug("ExampleStreaming.OnRecognize()", "\t word: {0} | confidence: {1}", alternative.word, alternative.confidence);
                    }
                }
            }
        }
    }

    private void OnTranslateFail(RESTConnector.Error error, Dictionary<string, object> customData)
    {
        Log.Debug("ExampleStreaming", "Translation failed: {0}", error.ErrorMessage);
    }

    private void OnTranslateSuccess(Translations response, Dictionary<string, object> customData)
    {
        ResultsField.text = response.translations[0].translation;
    }

    private void OnRecognizeSpeaker(SpeakerRecognitionEvent result, Dictionary<string, object> customData)
    {
        if (result != null)
        {
            foreach (SpeakerLabelsResult labelResult in result.speaker_labels)
            {
                Log.Debug("ExampleStreaming.OnRecognize()", string.Format("speaker result: {0} | confidence: {3} | from: {1} | to: {2}", labelResult.speaker, labelResult.from, labelResult.to, labelResult.confidence));
            }
        }
    }
}
	/**
	* Copyright 2015 IBM Corp. All Rights Reserved.
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*
	*/

	using UnityEngine;
	using System.Collections;
	using IBM.Watson.DeveloperCloud.Logging;
	using IBM.Watson.DeveloperCloud.Services.SpeechToText.v1;
	using IBM.Watson.DeveloperCloud.Services.LanguageTranslator.v3;
	using IBM.Watson.DeveloperCloud.Utilities;
	using IBM.Watson.DeveloperCloud.DataTypes;
	using System.Collections.Generic;
	using UnityEngine.UI;
	using System;
	using IBM.Watson.DeveloperCloud.Connection;

	public class ExampleStreaming : MonoBehaviour
	{
	#region PLEASE SET THESE VARIABLES IN THE INSPECTOR
	[Space(10)]
	[Tooltip("The service url for the Speech to Text service (optional). This defaults to \"https://stream.watsonplatform.net/speech-to-text/api\"")]
	[SerializeField]
	private string _STTServiceUrl;
	[Tooltip("Text field to display the results of streaming.")]
	public Text ResultsField;
	[Header("CF Authentication")]
	[Tooltip("The CF speech to text service username.")]
	[SerializeField]
	private string _STTServiceUsername;
	[Tooltip("The CF speech to text service password.")]
	[SerializeField]
	private string _STTServicePassword;
	[Header("IAM Authentication")]
	[Tooltip("The IAM apikey.")]
	[SerializeField]
	private string _STTServiceIamApikey;
	[Tooltip("The IAM url used to authenticate the apikey (optional). This defaults to \"https://iam.bluemix.net/identity/token\".")]
	[SerializeField]
	private string _STTServiceIamUrl;
	[Header("Language Translator")]
	[Space(10)]
	[Tooltip("The IAM apikey for the Language Translator service.")]
	[SerializeField]
	private string _LTServiceIamApikey;
	[Tooltip("The service url for the Language Translator service.")]
	[SerializeField]
	private string _LTServiceUrl;
	#endregion


	private int _recordingRoutine = 0;
	private string _microphoneID = null;
	private AudioClip _recording = null;
	private int _recordingBufferSize = 1;
	private int _recordingHZ = 22050;

	private SpeechToText _service;
	private LanguageTranslator _languageTranslatorService;

	void Start()
	{
	LogSystem.InstallDefaultReactors();
	Runnable.Run(CreateService());
	}

	private IEnumerator CreateService()
	{
	// Create credential and instantiate service
	Credentials credentials = null;
	if (!string.IsNullOrEmpty(_STTServiceUsername) && !string.IsNullOrEmpty(_STTServicePassword))
	{
	// Authenticate using username and password
	credentials = new Credentials(_STTServiceUsername, _STTServicePassword, _STTServiceUrl);
	}
	else if (!string.IsNullOrEmpty(_STTServiceIamApikey))
	{
	// Authenticate using iamApikey
	TokenOptions tokenOptions = new TokenOptions()
	{
	IamApiKey = _STTServiceIamApikey,
	IamUrl = _STTServiceIamUrl
	};

	credentials = new Credentials(tokenOptions, _STTServiceUrl);

	// Wait for tokendata
	while (!credentials.HasIamTokenData())
	yield return null;
	}
	else
	{
	throw new WatsonException("Please provide either username and password or IAM apikey to authenticate the service.");
	}

	_service = new SpeechToText(credentials);
	_service.StreamMultipart = true;
	//_service.RecognizeModel = "";

	TokenOptions languageTranslatorTokenOptions = new TokenOptions()
	{
	IamApiKey = _LTServiceIamApikey
	};
	Credentials languageTranslatorCredentials = new Credentials(languageTranslatorTokenOptions, _LTServiceUrl);

	// Wait for tokendata
	while (!languageTranslatorCredentials.HasIamTokenData())
	yield return null;

	_languageTranslatorService = new LanguageTranslator("2018-07-17", languageTranslatorCredentials);

	Active = true;
	StartRecording();
	}

	public bool Active
	{
	get { return _service.IsListening; }
	set
	{
	if (value && !_service.IsListening)
	{
	_service.DetectSilence = true;
	_service.EnableWordConfidence = true;
	_service.EnableTimestamps = true;
	_service.SilenceThreshold = 0.01f;
	_service.MaxAlternatives = 0;
	_service.EnableInterimResults = true;
	_service.OnError = OnError;
	_service.InactivityTimeout = -1;
	_service.ProfanityFilter = false;
	_service.SmartFormatting = true;
	_service.SpeakerLabels = false;
	_service.WordAlternativesThreshold = null;
	_service.StartListening(OnRecognize, OnRecognizeSpeaker);
	}
	else if (!value && _service.IsListening)
	{
	_service.StopListening();
	}
	}
	}

	private void StartRecording()
	{
	if (_recordingRoutine == 0)
	{
	UnityObjectUtil.StartDestroyQueue();
	_recordingRoutine = Runnable.Run(RecordingHandler());
	}
	}

	private void StopRecording()
	{
	if (_recordingRoutine != 0)
	{
	Microphone.End(_microphoneID);
	Runnable.Stop(_recordingRoutine);
	_recordingRoutine = 0;
	}
	}

	private void OnError(string error)
	{
	Active = false;

	Log.Debug("ExampleStreaming.OnError()", "Error! {0}", error);
	}

	private IEnumerator RecordingHandler()
	{
	Log.Debug("ExampleStreaming.RecordingHandler()", "devices: {0}", Microphone.devices);
	_recording = Microphone.Start(_microphoneID, true, _recordingBufferSize, _recordingHZ);
	yield return null; // let _recordingRoutine get set..

	if (_recording == null)
	{
	StopRecording();
	yield break;
	}

	bool bFirstBlock = true;
	int midPoint = _recording.samples / 2;
	float[] samples = null;

	while (_recordingRoutine != 0 && _recording != null)
	{
	int writePos = Microphone.GetPosition(_microphoneID);
	if (writePos > _recording.samples \|\| !Microphone.IsRecording(_microphoneID))
	{
	Log.Error("ExampleStreaming.RecordingHandler()", "Microphone disconnected.");

	StopRecording();
	yield break;
	}

	if ((bFirstBlock && writePos >= midPoint)
	\|\| (!bFirstBlock && writePos < midPoint))
	{
	// front block is recorded, make a RecordClip and pass it onto our callback.
	samples = new float[midPoint];
	_recording.GetData(samples, bFirstBlock ? 0 : midPoint);

	AudioData record = new AudioData();
	record.MaxLevel = Mathf.Max(Mathf.Abs(Mathf.Min(samples)), Mathf.Max(samples));
	record.Clip = AudioClip.Create("Recording", midPoint, _recording.channels, _recordingHZ, false);
	record.Clip.SetData(samples, 0);

	_service.OnListen(record);

	bFirstBlock = !bFirstBlock;
	}
	else
	{
	// calculate the number of samples remaining until we ready for a block of audio,
	// and wait that amount of time it will take to record.
	int remaining = bFirstBlock ? (midPoint - writePos) : (_recording.samples - writePos);
	float timeRemaining = (float)remaining / (float)_recordingHZ;

	yield return new WaitForSeconds(timeRemaining);
	}

	}

	yield break;
	}

	private void OnRecognize(SpeechRecognitionEvent result, Dictionary<string, object> customData)
	{
	if (result != null && result.results.Length > 0)
	{
	foreach (var res in result.results)
	{
	foreach (var alt in res.alternatives)
	{
	string text = string.Format("{0} ({1}, {2:0.00})\n", alt.transcript, res.final ? "Final" : "Interim", alt.confidence);
	Log.Debug("ExampleStreaming.OnRecognize()", text);
	//ResultsField.text = text;

	_languageTranslatorService.GetTranslation(OnTranslateSuccess, OnTranslateFail, text, "en-de");
	}

	if (res.keywords_result != null && res.keywords_result.keyword != null)
	{
	foreach (var keyword in res.keywords_result.keyword)
	{
	Log.Debug("ExampleStreaming.OnRecognize()", "keyword: {0}, confidence: {1}, start time: {2}, end time: {3}", keyword.normalized_text, keyword.confidence, keyword.start_time, keyword.end_time);
	}
	}

	if (res.word_alternatives != null)
	{
	foreach (var wordAlternative in res.word_alternatives)
	{
	Log.Debug("ExampleStreaming.OnRecognize()", "Word alternatives found. Start time: {0} \| EndTime: {1}", wordAlternative.start_time, wordAlternative.end_time);
	foreach(var alternative in wordAlternative.alternatives)
	Log.Debug("ExampleStreaming.OnRecognize()", "\t word: {0} \| confidence: {1}", alternative.word, alternative.confidence);
	}
	}
	}
	}
	}

	private void OnTranslateFail(RESTConnector.Error error, Dictionary<string, object> customData)
	{
	Log.Debug("ExampleStreaming", "Translation failed: {0}", error.ErrorMessage);
	}

	private void OnTranslateSuccess(Translations response, Dictionary<string, object> customData)
	{
	ResultsField.text = response.translations[0].translation;
	}

	private void OnRecognizeSpeaker(SpeakerRecognitionEvent result, Dictionary<string, object> customData)
	{
	if (result != null)
	{
	foreach (SpeakerLabelsResult labelResult in result.speaker_labels)
	{
	Log.Debug("ExampleStreaming.OnRecognize()", string.Format("speaker result: {0} \| confidence: {3} \| from: {1} \| to: {2}", labelResult.speaker, labelResult.from, labelResult.to, labelResult.confidence));
	}
	}
	}
	}