Skip to content

Instantly share code, notes, and snippets.

@RichLogan
Created August 30, 2016 09:42
Show Gist options
  • Save RichLogan/60c7bbfda833387dadea5b6a80870ae6 to your computer and use it in GitHub Desktop.
Save RichLogan/60c7bbfda833387dadea5b6a80870ae6 to your computer and use it in GitHub Desktop.
HoloLens Spark -> Speech
#if UNITY_WSA
using Cisco.Spark;
using HoloToolkit.Unity;
using System.Collections;
using System.Text;
using UnityEngine;
using UnityEngine.Windows.Speech;
public class SparkSpeechRecognition : MonoBehaviour
{
[Tooltip("The parsed speech")]
public string Message;
private DictationRecognizer dictationRecognizer;
private StringBuilder textSoFar;
// Using an empty string specifies the default microphone.
private static string deviceName = string.Empty;
private int samplingRate;
private const int messageLength = 10;
private string lastMessage = "";
void Awake()
{
dictationRecognizer = new DictationRecognizer();
// Register for dictationRecognizer.DictationHypothesis and implement DictationHypothesis below
// This event is fired while the user is talking. As the recognizer listens, it provides text of what it's heard so far.
dictationRecognizer.DictationHypothesis += DictationRecognizer_DictationHypothesis;
// Register for dictationRecognizer.DictationResult and implement DictationResult below
// This event is fired after the user pauses, typically at the end of a sentence. The full recognized string is returned here.
dictationRecognizer.DictationResult += DictationRecognizer_DictationResult;
// Register for dictationRecognizer.DictationComplete and implement DictationComplete below
// This event is fired when the recognizer stops, whether from Stop() being called, a timeout occurring, or some other error.
dictationRecognizer.DictationComplete += DictationRecognizer_DictationComplete;
// Register for dictationRecognizer.DictationError and implement DictationError below
// This event is fired when an error occurs.
dictationRecognizer.DictationError += DictationRecognizer_DictationError;
// Query the maximum frequency of the default microphone. Use 'unused' to ignore the minimum frequency.
int unused;
Microphone.GetDeviceCaps(deviceName, out unused, out samplingRate);
// Use this string to cache the text currently displayed in the text box.
textSoFar = new StringBuilder();
}
void Update()
{
// Add condition to check if dictationRecognizer.Status is Running
if (!Microphone.IsRecording(deviceName) && dictationRecognizer.Status == SpeechSystemStatus.Running)
{
// StopRecording();
}
}
/// <summary>
/// Turns on the dictation recognizer and begins recording audio from the default microphone.
/// </summary>
/// <returns>The audio clip recorded from the microphone.</returns>
public AudioClip StartRecording()
{
// Stop keyword recognition and start speech recognition
PhraseRecognitionSystem.Shutdown();
dictationRecognizer.Start();
// Record (10 seconds)
return Microphone.Start(deviceName, false, messageLength, samplingRate);
}
/// <summary>
/// Ends the recording session.
/// </summary>
public void StopRecording()
{
// Check if dictationRecognizer.Status is Running and stop it if so
if (dictationRecognizer.Status == SpeechSystemStatus.Running)
{
dictationRecognizer.Stop();
}
Microphone.End(deviceName);
}
/// <summary>
/// This event is fired while the user is talking. As the recognizer listens, it provides text of what it's heard so far.
/// </summary>
/// <param name="text">The currently hypothesized recognition.</param>
private void DictationRecognizer_DictationHypothesis(string text)
{
// We don't want to append to textSoFar yet, because the hypothesis may have changed on the next event
Message = textSoFar.ToString() + " " + text;
}
/// <summary>
/// This event is fired after the user pauses, typically at the end of a sentence. The full recognized string is returned here.
/// </summary>
/// <param name="text">The text that was heard by the recognizer.</param>
/// <param name="confidence">A representation of how confident (rejected, low, medium, high) the recognizer is of this recognition.</param>
private void DictationRecognizer_DictationResult(string text, ConfidenceLevel confidence)
{
textSoFar.Append(text);
// Remove any previous message
Debug.Log("Old message is: " + lastMessage);
if (lastMessage != "" && lastMessage != null)
{
textSoFar = textSoFar.Replace(lastMessage.ToLower(), "");
Debug.Log("Removing old gives:" + textSoFar);
}
lastMessage = textSoFar.ToString();
// Capatalize first character for politeness :)
// textSoFar[0] = char.ToUpper(textSoFar[0]);
// Save final message
Message = textSoFar.ToString();
// Now we should send to Spark
Message speechRecognizedMessage = new Message();
// Hard Code Test Room
speechRecognizedMessage.RoomId = "Y2lzY29zcGFyazovL3VzL1JPT00vMjgyOTIyYTAtNTNmYi0xMWU2LThmZmMtYWJmMDM5MDkyZDM5";
// Note that the message was recognised via Speech Recognition (and show off some Markdown!)
speechRecognizedMessage.Markdown = Message + "\n\n *(Via speech (Confidence: " + confidence.ToString() + "))*";
// Send to Spark
Debug.Log("Sending to Spark");
StartCoroutine(speechRecognizedMessage.Commit(message => { }));
// Stop Recording
StopRecording();
// Restart Keyword Listener
StartCoroutine(RestartSpeechSystem());
}
/// <summary>
/// This event is fired when the recognizer stops, whether from Stop() being called, a timeout occurring, or some other error.
/// Typically, this will simply return "Complete". In this case, we check to see if the recognizer timed out.
/// </summary>
/// <param name="cause">An enumerated reason for the session completing.</param>
private void DictationRecognizer_DictationComplete(DictationCompletionCause cause)
{
// If Timeout occurs, the user has been silent for too long.
// With dictation, the default timeout after a recognition is 20 seconds.
// The default timeout with initial silence is 5 seconds.
if (cause == DictationCompletionCause.TimeoutExceeded)
{
Microphone.End(deviceName);
// DictationDisplay = "Dictation has timed out. Please press the record button again.";
SendMessage("ResetAfterTimeout");
}
}
/// <summary>
/// This event is fired when an error occurs.
/// </summary>
/// <param name="error">The string representation of the error reason.</param>
/// <param name="hresult">The int representation of the hresult.</param>
private void DictationRecognizer_DictationError(string error, int hresult)
{
Debug.LogError(error + "\nHRESULT: " + hresult);
}
private IEnumerator RestartSpeechSystem(KeywordManager keywordToStart)
{
while (dictationRecognizer != null && dictationRecognizer.Status == SpeechSystemStatus.Running)
{
yield return null;
}
keywordToStart.StartKeywordRecognizer();
}
private IEnumerator RestartSpeechSystem()
{
while (dictationRecognizer != null && dictationRecognizer.Status == SpeechSystemStatus.Running)
{
Debug.Log("Still running...");
yield return null;
}
Debug.Log("Ready to restart keywords");
// Debug.Log(FindObjectOfType<SpeechManager>().keywordRecognizer.IsRunning);
FindObjectOfType<SpeechManager>().keywordRecognizer.Start();
Debug.Log("Successfully restarted keywords");
}
}
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment