Skip to content

Instantly share code, notes, and snippets.

@ceee
Created July 17, 2014 15:42
Show Gist options
  • Save ceee/447f567c8467c7d9b3f4 to your computer and use it in GitHub Desktop.
Save ceee/447f567c8467c7d9b3f4 to your computer and use it in GitHub Desktop.
Memory efficient TTS background audio in WinRT
using Newtonsoft.Json;
using Poki.Utilities;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Xml.Linq;
using Windows.ApplicationModel.Background;
using Windows.Foundation.Collections;
using Windows.Media;
using Windows.Media.Playback;
using Windows.Media.SpeechSynthesis;
using System.Linq;
using Windows.Foundation;
namespace Poki.AudioTask
{
enum ForegroundAppStatus
{
Active,
Suspended,
Unknown
}
public sealed class AudioBackgroundTask : IBackgroundTask
{
#region Private fields, properties
private SystemMediaTransportControls systemmediatransportcontrol;
private BackgroundTaskDeferral deferral;
private ForegroundAppStatus foregroundAppState = ForegroundAppStatus.Unknown;
private AutoResetEvent BackgroundTaskStarted = new AutoResetEvent(false);
private bool backgroundtaskrunning = false;
private AudioStream stream;
#endregion
#region IBackgroundTask and IBackgroundTaskInstance Interface Members and handlers
/// <summary>
/// The Run method is the entry point of a background task.
/// </summary>
/// <param name="taskInstance"></param>
public void Run(IBackgroundTaskInstance taskInstance)
{
Debug.WriteLine("audio: starting");
// Initialize SMTC object to talk with UVC.
//Note that, this is intended to run after app is paused and
//hence all the logic must be written to run in background process
systemmediatransportcontrol = SystemMediaTransportControls.GetForCurrentView();
systemmediatransportcontrol.ButtonPressed += systemmediatransportcontrol_ButtonPressed;
systemmediatransportcontrol.IsEnabled = true;
systemmediatransportcontrol.IsPauseEnabled = true;
systemmediatransportcontrol.IsPlayEnabled = true;
systemmediatransportcontrol.IsNextEnabled = false;
systemmediatransportcontrol.IsPreviousEnabled = false;
// Associate a cancellation and completed handlers with the background task.
taskInstance.Canceled += new BackgroundTaskCanceledEventHandler(OnCanceled);
taskInstance.Task.Completed += Taskcompleted;
var value = SettingsHelper.ReadResetSettingsValue(AudioConstants.AppState);
foregroundAppState = value == null ? ForegroundAppStatus.Unknown : (ForegroundAppStatus)Enum.Parse(typeof(ForegroundAppStatus), value.ToString());
// Add handlers for MediaPlayer
BackgroundMediaPlayer.Current.CurrentStateChanged += Current_CurrentStateChanged;
BackgroundMediaPlayer.Current.MediaEnded += Current_MediaEnded;
// Initialize message channel
BackgroundMediaPlayer.MessageReceivedFromForeground += BackgroundMediaPlayer_MessageReceivedFromForeground;
// Send information to foreground that background task has been started if app is active
if (foregroundAppState != ForegroundAppStatus.Suspended)
{
ValueSet message = new ValueSet();
message.Add(AudioConstants.BackgroundTaskStarted, "");
BackgroundMediaPlayer.SendMessageToForeground(message);
}
BackgroundTaskStarted.Set();
backgroundtaskrunning = true;
SettingsHelper.SaveSettingsValue(AudioConstants.BackgroundTaskState, AudioConstants.BackgroundTaskRunning);
deferral = taskInstance.GetDeferral();
}
/// <summary>
/// Indicate that the background task is completed.
/// </summary>
void Taskcompleted(BackgroundTaskRegistration sender, BackgroundTaskCompletedEventArgs args)
{
Debug.WriteLine("MyBackgroundAudioTask " + sender.TaskId + " Completed...");
deferral.Complete();
}
/// <summary>
/// Handles background task cancellation. Task cancellation happens due to :
/// 1. Another Media app comes into foreground and starts playing music
/// 2. Resource pressure. Your task is consuming more CPU and memory than allowed.
/// In either case, save state so that if foreground app resumes it can know where to start.
/// </summary>
private void OnCanceled(IBackgroundTaskInstance sender, BackgroundTaskCancellationReason reason)
{
// You get some time here to save your state before process and resources are reclaimed
Debug.WriteLine("MyBackgroundAudioTask " + sender.Task.TaskId + " Cancel Requested...");
try
{
//save state
SettingsHelper.SaveSettingsValue(AudioConstants.Position, BackgroundMediaPlayer.Current.Position.ToString());
SettingsHelper.SaveSettingsValue(AudioConstants.BackgroundTaskState, AudioConstants.BackgroundTaskCancelled);
SettingsHelper.SaveSettingsValue(AudioConstants.AppState, Enum.GetName(typeof(ForegroundAppStatus), foregroundAppState));
backgroundtaskrunning = false;
//unsubscribe event handlers
systemmediatransportcontrol.ButtonPressed -= systemmediatransportcontrol_ButtonPressed;
BackgroundMediaPlayer.Shutdown(); // shutdown media pipeline
}
catch (Exception ex)
{
Debug.WriteLine(ex.ToString());
}
if (deferral != null) deferral.Complete(); // signals task completion.
Debug.WriteLine("MyBackgroundAudioTask Cancel complete...");
}
#endregion
#region SysteMediaTransportControls related functions and handlers
/// <summary>
/// Update UVC using SystemMediaTransPortControl apis
/// </summary>
private void UpdateUVCOnNewTrack()
{
systemmediatransportcontrol.PlaybackStatus = MediaPlaybackStatus.Playing;
systemmediatransportcontrol.DisplayUpdater.Type = MediaPlaybackType.Music;
systemmediatransportcontrol.DisplayUpdater.MusicProperties.Title = stream.Data.Title;
systemmediatransportcontrol.DisplayUpdater.MusicProperties.Artist = "Poki";
systemmediatransportcontrol.DisplayUpdater.Update();
}
/// <summary>
/// This function controls the button events from UVC.
/// This code if not run in background process, will not be able to handle button pressed events when app is suspended.
/// </summary>
/// <param name="sender"></param>
/// <param name="args"></param>
private void systemmediatransportcontrol_ButtonPressed(SystemMediaTransportControls sender, SystemMediaTransportControlsButtonPressedEventArgs args)
{
switch (args.Button)
{
case SystemMediaTransportControlsButton.Play:
Debug.WriteLine("audio: UVC play button pressed");
bool success = false;
try
{
BackgroundMediaPlayer.Current.Play();
success = true;
}
catch (Exception)
{
success = false;
}
// If music is in paused state, for a period of more than 5 minutes,
//app will get task cancellation and it cannot run code.
//However, user can still play music by pressing play via UVC unless a new app comes in clears UVC.
//When this happens, the task gets re-initialized and that is asynchronous and hence the wait
if (!success)
{
if (!backgroundtaskrunning)
{
bool result = BackgroundTaskStarted.WaitOne(2000);
if (!result)
throw new Exception("audio: Background Task didnt initialize in time");
}
StartPlayback();
}
break;
case SystemMediaTransportControlsButton.Pause:
Debug.WriteLine("audio: UVC pause button pressed");
try
{
BackgroundMediaPlayer.Current.Pause();
}
catch (Exception ex)
{
Debug.WriteLine(ex.ToString());
}
break;
}
}
#endregion
#region Playlist management functions and handlers
/// <summary>
/// Starts the playback from the current position
/// </summary>
private async void StartPlayback()
{
if (stream == null)
{
return;
}
try
{
SpeechSynthesisStream synthStream = await stream.StreamNext();
if (synthStream != null)
{
BackgroundMediaPlayer.Current.SetStreamSource(synthStream);
BackgroundMediaPlayer.Current.Play();
UpdateUVCOnNewTrack();
}
}
catch (Exception)
{
}
}
/// <summary>
/// Fires when the currently played media ended
/// And resumes with the next text part or stops (when no part available)
/// </summary>
private void Current_MediaEnded(MediaPlayer sender, object args)
{
StartPlayback();
}
#endregion
#region Background Media Player Handlers
void Current_CurrentStateChanged(MediaPlayer sender, object args)
{
if (sender.CurrentState == MediaPlayerState.Playing)
{
systemmediatransportcontrol.PlaybackStatus = MediaPlaybackStatus.Playing;
}
else if (sender.CurrentState == MediaPlayerState.Paused)
{
systemmediatransportcontrol.PlaybackStatus = MediaPlaybackStatus.Paused;
}
}
/// <summary>
/// Fires when a message is recieved from the foreground app
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
void BackgroundMediaPlayer_MessageReceivedFromForeground(object sender, MediaPlayerDataReceivedEventArgs e)
{
foreach (string key in e.Data.Keys)
{
Debug.WriteLine("audio: message received - " + key);
switch (key.ToLower())
{
// set new content/article
case AudioConstants.Content:
stream = new AudioStream(
JsonConvert.DeserializeObject<AudioData>((string)e.Data[AudioConstants.Content])
);
break;
// in case the app was suspended
case AudioConstants.AppSuspended:
foregroundAppState = ForegroundAppStatus.Suspended;
break;
// restart playback from the beginning
case AudioConstants.RestartPlayback:
if (stream != null)
{
stream.ResetPosition();
if (BackgroundMediaPlayer.Current.CurrentState == MediaPlayerState.Playing)
{
BackgroundMediaPlayer.Current.Pause();
BackgroundMediaPlayer.Current.Position = TimeSpan.Zero;
StartPlayback();
}
}
break;
// stop playback and reset position
case AudioConstants.StopPlayback:
if (stream != null)
{
stream.ResetPosition();
}
if (BackgroundMediaPlayer.Current.CanPause)
{
BackgroundMediaPlayer.Current.Pause();
BackgroundMediaPlayer.Current.Position = TimeSpan.Zero;
}
break;
// in case the app was resumed
case AudioConstants.AppResumed:
foregroundAppState = ForegroundAppStatus.Active;
break;
// start playback
case AudioConstants.StartPlayback:
StartPlayback();
break;
}
}
}
#endregion
}
}
using Newtonsoft.Json;
using Windows.Media.SpeechSynthesis;
namespace Poki.AudioTask
{
[JsonObject]
public sealed class AudioData
{
public string Content { get; set; }
public string VoiceID { get; set; }
public string DesiredLanguage { get; set; }
public string Title { get; set; }
}
}
using Poki.Utilities;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Threading.Tasks;
using System.Xml.Linq;
using Windows.Foundation;
using Windows.Media.SpeechSynthesis;
namespace Poki.AudioTask
{
public sealed class AudioStream
{
public AudioData Data { get; set; }
private SpeechSynthesizer Synthesizer;
private int currentPosition = 0;
private IEnumerable<string> contentParts;
/// <summary>
/// Creates a new instance of the AudioStream.
/// </summary>
public AudioStream(AudioData data)
{
Data = data;
Synthesizer = new SpeechSynthesizer();
Synthesizer.Voice = SpeechSynthesizer.AllVoices.FirstOrDefault(item => item.Id == Data.VoiceID) ?? SpeechSynthesizer.DefaultVoice;
contentParts = Data.Content.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
}
/// <summary>
/// Resets the current position
/// </summary>
public void ResetPosition()
{
currentPosition = 0;
}
/// <summary>
/// Streams the next part
/// </summary>
public IAsyncOperation<SpeechSynthesisStream> StreamNext()
{
Debug.WriteLine("audio stream: " + currentPosition.ToString());
int max = Options.AudioStreamCacheSize; // in words
int take = max;
// get part of text for reading
bool reachedLimit = false;
List<string> parts = contentParts
.Skip(currentPosition)
.TakeWhile(part =>
{
bool result = reachedLimit;
reachedLimit = (take-- < 0 && (part.EndsWith(".") || part.EndsWith("?") || part.EndsWith("!"))) || take < -max;
return !result;
})
.ToList();
int contentCount = parts.Count();
// this is the last part
if (contentCount == 0)
{
ResetPosition();
return null;
}
IAsyncOperation<SpeechSynthesisStream> streamOperation = CreateSSMLStream(parts, Data.Title, Synthesizer.Voice, currentPosition == 0).AsAsyncOperation<SpeechSynthesisStream>();
currentPosition = currentPosition + contentCount;
return streamOperation;
}
/// <summary>
/// Creates the SSML.
/// </summary>
private async Task<SpeechSynthesisStream> CreateSSMLStream(IEnumerable<string> contentParts, string title, VoiceInformation voice, bool appendTitle = true)
{
// split text into paragraphs
contentParts = String.Join(" ", contentParts).Split(new string[] { "\r\n", "\n" }, StringSplitOptions.RemoveEmptyEntries);
string[] innerParts;
XElement pElement;
XNamespace ns = "http://www.w3.org/2001/10/synthesis";
// create root
XDocument document = new XDocument(
new XElement(ns + "speak",
new XAttribute("version", "1.0"),
new XAttribute("lang", "__REPLACE__")
)
);
// append title
if (appendTitle)
{
document.Root.Add(
new XCData(title),
new XElement(ns + "break")
);
}
// create paragraphs
foreach (string part in contentParts)
{
// split paragraph into sentences
innerParts = part.Split(new string[] { ". ", "! ", "? " }, StringSplitOptions.RemoveEmptyEntries);
pElement = new XElement(ns + "p");
foreach (string innerPart in innerParts)
{
pElement.Add(
new XElement(ns + "s",
new XCData(innerPart)
)
);
}
document.Root.Add(pElement);
}
// create stream from XML
return await Synthesizer.SynthesizeSsmlToStreamAsync(document.Root
.ToString()
.Replace("lang=\"__REPLACE__\"", "xml:lang=\"" + voice.Language + "\"")
.Replace(" xmlns=\"\"", ""));
}
}
}
@ceee
Copy link
Author

ceee commented Jul 17, 2014

1.) A message is sent to the background task which contains an AudioData instance with the text and the voice information.
2.) The AudioStream instance is created from the AudioData.
3.) The AudioStream creates the SpeechSynthesizer with the correct language and splits the content into words.
4.) When the startplayback message is received, AudioStream.StreamNext() method is called, which generates the SpeechSynthesisStream.
5.) The StreamNext method skips as much words as in currentPosition is stored and takes at least Options.AudioStreamCacheSize (in my case 100) and stops when it encounters a punctuation or exceeds 200 words.
6.) Not plain text is created, but SSML (a variant of XML). The text is split into paragraphs (p) and sentences (s).
7.) The SSML is converted to a Stream and assigned to the BackgroundMediaPlayer.
8.) As soon as the media player reaches the end, the event MediaEnded is triggered, which calls StreamNext() again, but now with the new position. This is repeated until no more words are available.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment