RichLogan/SparkSpeech.cs

## SparkSpeech.cs
#if UNITY_WSA
using Cisco.Spark;
using HoloToolkit.Unity;
using System.Collections;
using System.Text;
using UnityEngine;
using UnityEngine.Windows.Speech;

public class SparkSpeechRecognition : MonoBehaviour
{
    [Tooltip("The parsed speech")]
    public string Message;

    private DictationRecognizer dictationRecognizer;
    private StringBuilder textSoFar;

    // Using an empty string specifies the default microphone.
    private static string deviceName = string.Empty;
    private int samplingRate;
    private const int messageLength = 10;

    private string lastMessage = "";

    void Awake()
    {
        dictationRecognizer = new DictationRecognizer();

        // Register for dictationRecognizer.DictationHypothesis and implement DictationHypothesis below
        // This event is fired while the user is talking. As the recognizer listens, it provides text of what it's heard so far.
        dictationRecognizer.DictationHypothesis += DictationRecognizer_DictationHypothesis;

        // Register for dictationRecognizer.DictationResult and implement DictationResult below
        // This event is fired after the user pauses, typically at the end of a sentence. The full recognized string is returned here.
        dictationRecognizer.DictationResult += DictationRecognizer_DictationResult;

        // Register for dictationRecognizer.DictationComplete and implement DictationComplete below
        // This event is fired when the recognizer stops, whether from Stop() being called, a timeout occurring, or some other error.
        dictationRecognizer.DictationComplete += DictationRecognizer_DictationComplete;

        // Register for dictationRecognizer.DictationError and implement DictationError below
        // This event is fired when an error occurs.
        dictationRecognizer.DictationError += DictationRecognizer_DictationError;

        // Query the maximum frequency of the default microphone. Use 'unused' to ignore the minimum frequency.
        int unused;
        Microphone.GetDeviceCaps(deviceName, out unused, out samplingRate);

        // Use this string to cache the text currently displayed in the text box.
        textSoFar = new StringBuilder();
    }

    void Update()
    {
        // Add condition to check if dictationRecognizer.Status is Running
        if (!Microphone.IsRecording(deviceName) && dictationRecognizer.Status == SpeechSystemStatus.Running)
        {
            // StopRecording();
        }
    }

    /// <summary>
    /// Turns on the dictation recognizer and begins recording audio from the default microphone.
    /// </summary>
    /// <returns>The audio clip recorded from the microphone.</returns>
    public AudioClip StartRecording()
    {
        // Stop keyword recognition and start speech recognition
        PhraseRecognitionSystem.Shutdown();
        dictationRecognizer.Start();

        // Record (10 seconds)
        return Microphone.Start(deviceName, false, messageLength, samplingRate);
    }

    /// <summary>
    /// Ends the recording session.
    /// </summary>
    public void StopRecording()
    {
        // Check if dictationRecognizer.Status is Running and stop it if so
        if (dictationRecognizer.Status == SpeechSystemStatus.Running)
        {
            dictationRecognizer.Stop();
        }
        Microphone.End(deviceName);
    }

    /// <summary>
    /// This event is fired while the user is talking. As the recognizer listens, it provides text of what it's heard so far.
    /// </summary>
    /// <param name="text">The currently hypothesized recognition.</param>
    private void DictationRecognizer_DictationHypothesis(string text)
    {
        // We don't want to append to textSoFar yet, because the hypothesis may have changed on the next event
        Message = textSoFar.ToString() + " " + text;
    }

    /// <summary>
    /// This event is fired after the user pauses, typically at the end of a sentence. The full recognized string is returned here.
    /// </summary>
    /// <param name="text">The text that was heard by the recognizer.</param>
    /// <param name="confidence">A representation of how confident (rejected, low, medium, high) the recognizer is of this recognition.</param>
    private void DictationRecognizer_DictationResult(string text, ConfidenceLevel confidence)
    {
        textSoFar.Append(text);

        // Remove any previous message
        Debug.Log("Old message is: " + lastMessage);
        if (lastMessage != "" && lastMessage != null)
        {
            textSoFar = textSoFar.Replace(lastMessage.ToLower(), "");
            Debug.Log("Removing old gives:" + textSoFar);
        }
        lastMessage = textSoFar.ToString();

        // Capatalize first character for politeness :)
        // textSoFar[0] = char.ToUpper(textSoFar[0]);

        // Save final message
        Message = textSoFar.ToString();

        // Now we should send to Spark
        Message speechRecognizedMessage = new Message();

        // Hard Code Test Room
        speechRecognizedMessage.RoomId = "Y2lzY29zcGFyazovL3VzL1JPT00vMjgyOTIyYTAtNTNmYi0xMWU2LThmZmMtYWJmMDM5MDkyZDM5";

        // Note that the message was recognised via Speech Recognition (and show off some Markdown!)
        speechRecognizedMessage.Markdown = Message + "\n\n *(Via speech (Confidence: " + confidence.ToString() + "))*";

        // Send to Spark
        Debug.Log("Sending to Spark");
        StartCoroutine(speechRecognizedMessage.Commit(message => { }));

        // Stop Recording
        StopRecording();

        // Restart Keyword Listener
        StartCoroutine(RestartSpeechSystem());
    }

    /// <summary>
    /// This event is fired when the recognizer stops, whether from Stop() being called, a timeout occurring, or some other error.
    /// Typically, this will simply return "Complete". In this case, we check to see if the recognizer timed out.
    /// </summary>
    /// <param name="cause">An enumerated reason for the session completing.</param>
    private void DictationRecognizer_DictationComplete(DictationCompletionCause cause)
    {
        // If Timeout occurs, the user has been silent for too long.
        // With dictation, the default timeout after a recognition is 20 seconds.
        // The default timeout with initial silence is 5 seconds.
        if (cause == DictationCompletionCause.TimeoutExceeded)
        {
            Microphone.End(deviceName);

            // DictationDisplay = "Dictation has timed out. Please press the record button again.";
            SendMessage("ResetAfterTimeout");
        }
    }

    /// <summary>
    /// This event is fired when an error occurs.
    /// </summary>
    /// <param name="error">The string representation of the error reason.</param>
    /// <param name="hresult">The int representation of the hresult.</param>
    private void DictationRecognizer_DictationError(string error, int hresult)
    {
        Debug.LogError(error + "\nHRESULT: " + hresult);
    }

    private IEnumerator RestartSpeechSystem(KeywordManager keywordToStart)
    {
        while (dictationRecognizer != null && dictationRecognizer.Status == SpeechSystemStatus.Running)
        {
            yield return null;
        }

        keywordToStart.StartKeywordRecognizer();
    }

    private IEnumerator RestartSpeechSystem()
    {
        while (dictationRecognizer != null && dictationRecognizer.Status == SpeechSystemStatus.Running)
        {
            Debug.Log("Still running...");
            yield return null;
        }

        Debug.Log("Ready to restart keywords");
        // Debug.Log(FindObjectOfType<SpeechManager>().keywordRecognizer.IsRunning);
        FindObjectOfType<SpeechManager>().keywordRecognizer.Start();
        Debug.Log("Successfully restarted keywords");
    }
}
#endif
	#if UNITY_WSA
	using Cisco.Spark;
	using HoloToolkit.Unity;
	using System.Collections;
	using System.Text;
	using UnityEngine;
	using UnityEngine.Windows.Speech;

	public class SparkSpeechRecognition : MonoBehaviour
	{
	[Tooltip("The parsed speech")]
	public string Message;

	private DictationRecognizer dictationRecognizer;
	private StringBuilder textSoFar;

	// Using an empty string specifies the default microphone.
	private static string deviceName = string.Empty;
	private int samplingRate;
	private const int messageLength = 10;

	private string lastMessage = "";

	void Awake()
	{
	dictationRecognizer = new DictationRecognizer();

	// Register for dictationRecognizer.DictationHypothesis and implement DictationHypothesis below
	// This event is fired while the user is talking. As the recognizer listens, it provides text of what it's heard so far.
	dictationRecognizer.DictationHypothesis += DictationRecognizer_DictationHypothesis;

	// Register for dictationRecognizer.DictationResult and implement DictationResult below
	// This event is fired after the user pauses, typically at the end of a sentence. The full recognized string is returned here.
	dictationRecognizer.DictationResult += DictationRecognizer_DictationResult;

	// Register for dictationRecognizer.DictationComplete and implement DictationComplete below
	// This event is fired when the recognizer stops, whether from Stop() being called, a timeout occurring, or some other error.
	dictationRecognizer.DictationComplete += DictationRecognizer_DictationComplete;

	// Register for dictationRecognizer.DictationError and implement DictationError below
	// This event is fired when an error occurs.
	dictationRecognizer.DictationError += DictationRecognizer_DictationError;

	// Query the maximum frequency of the default microphone. Use 'unused' to ignore the minimum frequency.
	int unused;
	Microphone.GetDeviceCaps(deviceName, out unused, out samplingRate);

	// Use this string to cache the text currently displayed in the text box.
	textSoFar = new StringBuilder();
	}

	void Update()
	{
	// Add condition to check if dictationRecognizer.Status is Running
	if (!Microphone.IsRecording(deviceName) && dictationRecognizer.Status == SpeechSystemStatus.Running)
	{
	// StopRecording();
	}
	}

	/// <summary>
	/// Turns on the dictation recognizer and begins recording audio from the default microphone.
	/// </summary>
	/// <returns>The audio clip recorded from the microphone.</returns>
	public AudioClip StartRecording()
	{
	// Stop keyword recognition and start speech recognition
	PhraseRecognitionSystem.Shutdown();
	dictationRecognizer.Start();

	// Record (10 seconds)
	return Microphone.Start(deviceName, false, messageLength, samplingRate);
	}

	/// <summary>
	/// Ends the recording session.
	/// </summary>
	public void StopRecording()
	{
	// Check if dictationRecognizer.Status is Running and stop it if so
	if (dictationRecognizer.Status == SpeechSystemStatus.Running)
	{
	dictationRecognizer.Stop();
	}
	Microphone.End(deviceName);
	}

	/// <summary>
	/// This event is fired while the user is talking. As the recognizer listens, it provides text of what it's heard so far.
	/// </summary>
	/// <param name="text">The currently hypothesized recognition.</param>
	private void DictationRecognizer_DictationHypothesis(string text)
	{
	// We don't want to append to textSoFar yet, because the hypothesis may have changed on the next event
	Message = textSoFar.ToString() + " " + text;
	}

	/// <summary>
	/// This event is fired after the user pauses, typically at the end of a sentence. The full recognized string is returned here.
	/// </summary>
	/// <param name="text">The text that was heard by the recognizer.</param>
	/// <param name="confidence">A representation of how confident (rejected, low, medium, high) the recognizer is of this recognition.</param>
	private void DictationRecognizer_DictationResult(string text, ConfidenceLevel confidence)
	{
	textSoFar.Append(text);

	// Remove any previous message
	Debug.Log("Old message is: " + lastMessage);
	if (lastMessage != "" && lastMessage != null)
	{
	textSoFar = textSoFar.Replace(lastMessage.ToLower(), "");
	Debug.Log("Removing old gives:" + textSoFar);
	}
	lastMessage = textSoFar.ToString();

	// Capatalize first character for politeness :)
	// textSoFar[0] = char.ToUpper(textSoFar[0]);

	// Save final message
	Message = textSoFar.ToString();

	// Now we should send to Spark
	Message speechRecognizedMessage = new Message();

	// Hard Code Test Room
	speechRecognizedMessage.RoomId = "Y2lzY29zcGFyazovL3VzL1JPT00vMjgyOTIyYTAtNTNmYi0xMWU2LThmZmMtYWJmMDM5MDkyZDM5";

	// Note that the message was recognised via Speech Recognition (and show off some Markdown!)
	speechRecognizedMessage.Markdown = Message + "\n\n (Via speech (Confidence: " + confidence.ToString() + "))";

	// Send to Spark
	Debug.Log("Sending to Spark");
	StartCoroutine(speechRecognizedMessage.Commit(message => { }));

	// Stop Recording
	StopRecording();

	// Restart Keyword Listener
	StartCoroutine(RestartSpeechSystem());
	}

	/// <summary>
	/// This event is fired when the recognizer stops, whether from Stop() being called, a timeout occurring, or some other error.
	/// Typically, this will simply return "Complete". In this case, we check to see if the recognizer timed out.
	/// </summary>
	/// <param name="cause">An enumerated reason for the session completing.</param>
	private void DictationRecognizer_DictationComplete(DictationCompletionCause cause)
	{
	// If Timeout occurs, the user has been silent for too long.
	// With dictation, the default timeout after a recognition is 20 seconds.
	// The default timeout with initial silence is 5 seconds.
	if (cause == DictationCompletionCause.TimeoutExceeded)
	{
	Microphone.End(deviceName);

	// DictationDisplay = "Dictation has timed out. Please press the record button again.";
	SendMessage("ResetAfterTimeout");
	}
	}

	/// <summary>
	/// This event is fired when an error occurs.
	/// </summary>
	/// <param name="error">The string representation of the error reason.</param>
	/// <param name="hresult">The int representation of the hresult.</param>
	private void DictationRecognizer_DictationError(string error, int hresult)
	{
	Debug.LogError(error + "\nHRESULT: " + hresult);
	}

	private IEnumerator RestartSpeechSystem(KeywordManager keywordToStart)
	{
	while (dictationRecognizer != null && dictationRecognizer.Status == SpeechSystemStatus.Running)
	{
	yield return null;
	}

	keywordToStart.StartKeywordRecognizer();
	}

	private IEnumerator RestartSpeechSystem()
	{
	while (dictationRecognizer != null && dictationRecognizer.Status == SpeechSystemStatus.Running)
	{
	Debug.Log("Still running...");
	yield return null;
	}

	Debug.Log("Ready to restart keywords");
	// Debug.Log(FindObjectOfType<SpeechManager>().keywordRecognizer.IsRunning);
	FindObjectOfType<SpeechManager>().keywordRecognizer.Start();
	Debug.Log("Successfully restarted keywords");
	}
	}
	#endif