Last active October 5, 2022 22:02
SK Media Capture to Audio Graph's device output on the HL2
namespace SKAudioGraph
using StereoKit;
using System;
using System.Linq;
using System.Runtime.InteropServices;
using System.Threading;
using System.Threading.Tasks;
using Windows.Foundation;
using Windows.Media;
using Windows.Media.Audio;
using Windows.Media.Capture;
using Windows.Media.Capture.Frames;
using Windows.Media.MediaProperties;
using Windows.Media.Render;
internal class Program
private static AudioGraph graph;
private static AudioDeviceOutputNode deviceOutputNode;
private static AudioFrameInputNode frameInputNode;
private enum MenuState
private static MenuState State
get { return (MenuState)Interlocked.CompareExchange(ref state, 0, 0); }
set { Interlocked.Exchange(ref state, (int)value); }
private static int state = 0;
private static double outgoingGain = 1;
static void Main(string[] args)
// Initialize StereoKit
SKSettings settings = new SKSettings
appName = "SKAudioGraph",
assetsFolder = "Assets",
if (!SK.Initialize(settings))
Vec3 menuPosition = Input.Head.position + Input.Head.Forward * 0.6f + Vec3.Right * 0.15f;
Pose menuPose = new Pose(menuPosition, Quat.LookAt(menuPosition, Input.Head.position));
Vec2 menuSize = new Vec2(20, 10) *;
string audioText = "Play";
(AudioEncodingProperties audioEncodingProperties, MediaFrameReader audioFrameReader) = InitializeMediaCaptureAsync().GetAwaiter().GetResult();
var audioFrameHandler = CreateMediaFrameHandler();
// Core application loop
while (SK.Step(() =>
UI.WindowBegin("Audio Stream", ref menuPose, menuSize, moveType: UIMove.Exact);
switch (State)
case MenuState.Main:
if (UI.Button(audioText))
if (audioText.Equals("Play"))
State = MenuState.StartingAudio;
audioFrameReader.StartAsync().AsTask().ContinueWith(audioStatus =>
if (audioStatus.Result != MediaFrameReaderStartStatus.Success)
throw new InvalidOperationException($"Audio stream media frame reader failed to start: {audioStatus}");
audioFrameReader.FrameArrived += audioFrameHandler;
audioText = "Stop";
State = MenuState.Main;
State = MenuState.StoppingAudio;
audioFrameReader.StopAsync().AsTask().ContinueWith(_ =>
audioFrameReader.FrameArrived -= audioFrameHandler;
audioText = "Play";
State = MenuState.Main;
case MenuState.StartingAudio:
UI.Button("Play"); // does nothing... just for show
case MenuState.StoppingAudio:
UI.Button("Stop"); // does nothing... just for show
UI.Space(2 *;
float sliderMin = 0;
float sliderMax = 8;
float sliderStep = 0.5f;
float sliderWidth = menuSize.x;
if (UI.HSlider("Gain", ref outgoingGain, sliderMin, sliderMax, sliderStep, sliderWidth, UIConfirm.Pinch))
frameInputNode.OutgoingGain = outgoingGain;
// normalized to [-0.5,0.5] in cm
float percentNormalized = (float)outgoingGain / sliderMax - 0.5f;
$"{string.Format("{0:0.0}", Math.Truncate(outgoingGain * 10) / 10)}",
Matrix.TS(, V.XXX(0.5f)),
offX: -percentNormalized * UI.LayoutLast.dimensions.x * 2,
offY: -UI.LayoutLast.dimensions.y * 1.5f,
offZ: -0.001f
UI.Space(2 *;
if (UI.Button("Exit"))
/// <summary>
/// Initializes the MediaCapture object and creates the MediaFrameReaders for the configured capture streams.
/// </summary>
/// <returns>A task representing the asynchronous operation.</returns>
private static async Task<(AudioEncodingProperties, MediaFrameReader)> InitializeMediaCaptureAsync()
// Try to find the media capture settings for the requested capture configuration
var settings = new MediaCaptureInitializationSettings
AudioProcessing = AudioProcessing.Default,
MediaCategory = MediaCategory.Speech,
StreamingCaptureMode = StreamingCaptureMode.Audio,
MemoryPreference = MediaCaptureMemoryPreference.Cpu,
SharingMode = MediaCaptureSharingMode.ExclusiveControl,
// Initialize the MediaCapture object
var mediaCapture = new MediaCapture();
await mediaCapture.InitializeAsync(settings);
AudioEncodingProperties audioEncodingProperties = null;
MediaFrameReader audioFrameReader = null;
foreach (var sourceInfo in mediaCapture.FrameSources
.Where(si => si.Value.Info.MediaStreamType == MediaStreamType.Audio))
var audioFrameSource = mediaCapture.FrameSources[sourceInfo.Value.Info.Id];
audioFrameReader = await mediaCapture.CreateFrameReaderAsync(audioFrameSource);
audioEncodingProperties = audioFrameSource.CurrentFormat.AudioEncodingProperties;
if (audioFrameReader == null)
throw new InvalidOperationException("Could not create a frame reader for the requested audio source.");
return (audioEncodingProperties, audioFrameReader);
/// <summary>
/// Creates an event handler that handles the FrameArrived event of the MediaFrameReader.
/// </summary>
/// <returns>The event handler.</returns>
private static TypedEventHandler<MediaFrameReader, MediaFrameArrivedEventArgs> CreateMediaFrameHandler()
return (sender, args) =>
using var frame = sender.TryAcquireLatestFrame();
if (frame != null)
using MediaFrameReference mediaFrame = frame.AudioMediaFrame.FrameReference;
using AudioFrame audioFrame = frame.AudioMediaFrame.GetAudioFrame();
AudioEncodingProperties audioEncodingProperties = mediaFrame.AudioMediaFrame.AudioEncodingProperties;
using AudioBuffer buffer = audioFrame.LockBuffer(AudioBufferAccessMode.Read);
using IMemoryBufferReference reference = buffer.CreateReference();
((UnsafeNative.IMemoryBufferByteAccess)reference).GetBuffer(out byte* audioDataIn, out uint capacity);
uint frameDurMs = (uint)mediaFrame.Duration.TotalMilliseconds;
uint sampleRate = audioEncodingProperties.SampleRate;
uint sampleCount = (frameDurMs * sampleRate) / 1000;
uint numAudioChannels = audioEncodingProperties.ChannelCount;
uint bytesPerSample = audioEncodingProperties.BitsPerSample / 8;
// Buffer size is (number of samples) * (size of each sample)
byte[] audioDataOut = new byte[sampleCount * bytesPerSample];
// Convert to bytes
if (numAudioChannels > 1)
// Data is interlaced, so we need to change the multi-channel input
// to the supported single-channel output for StereoKit to consume
uint inPos = 0;
uint outPos = 0;
while (outPos < audioDataOut.Length)
byte* src = &audioDataIn[inPos];
fixed (byte* dst = &audioDataOut[outPos])
Buffer.MemoryCopy(src, dst, bytesPerSample, bytesPerSample);
inPos += bytesPerSample * numAudioChannels;
outPos += bytesPerSample;
// Buffer size is (number of samples) * (size of each sample)
byte* src = audioDataIn;
fixed (byte* dst = audioDataOut)
Buffer.MemoryCopy(src, dst, audioDataOut.Length, audioDataOut.Length);
AudioFrame audioData = RetrieveAudioData(audioDataOut);
private static async Task CreateAudioGraph(AudioEncodingProperties audioEncodingProperties)
// Create an AudioGraph with default settings
AudioGraphSettings settings = new AudioGraphSettings(AudioRenderCategory.Speech);
CreateAudioGraphResult result = await AudioGraph.CreateAsync(settings);
if (result.Status != AudioGraphCreationStatus.Success)
// Cannot create graph
Log.Info(String.Format("AudioGraph Creation Error because {0}", result.Status.ToString()));
graph = result.Graph;
// Create a device output node
CreateAudioDeviceOutputNodeResult deviceOutputNodeResult = await graph.CreateDeviceOutputNodeAsync();
if (deviceOutputNodeResult.Status != AudioDeviceNodeCreationStatus.Success)
// Cannot create device output node
Log.Info(string.Format("Audio Device Output unavailable because {0}", deviceOutputNodeResult.Status.ToString()));
deviceOutputNode = deviceOutputNodeResult.DeviceOutputNode;
Log.Info("Device Output Node successfully created");
// Create the FrameInputNode at the same format as the input device.
frameInputNode = graph.CreateFrameInputNode(audioEncodingProperties);
// Initialize the Frame Input Node in the stopped state
// Start the graph since we will only start/stop the frame input node
unsafe private static AudioFrame RetrieveAudioData(byte[] audioOut)
// Buffer size is (number of samples) * (size of each sample)
// We choose to generate single channel (mono) audio. For multi-channel, multiply by number of channels
uint bufferSize = (uint)audioOut.Length;
AudioFrame frame = new AudioFrame(bufferSize);
using AudioBuffer buffer = frame.LockBuffer(AudioBufferAccessMode.Write);
using IMemoryBufferReference reference = buffer.CreateReference();
byte* dataInBytes;
uint capacityInBytes;
float* dataInFloat;
((UnsafeNative.IMemoryBufferByteAccess)reference).GetBuffer(out dataInBytes, out capacityInBytes);
// Cast to float since the data we are generating is float
dataInFloat = (float*)dataInBytes;
fixed (byte* src = audioOut)
Buffer.MemoryCopy(src, dataInFloat, bufferSize, bufferSize);
return frame;
/// <summary>
/// Provides unsafe native APIs.
/// </summary>
private static class UnsafeNative
/// <summary>
/// Provides access to an IMemoryBuffer as an array of bytes.
/// </summary>
public unsafe interface IMemoryBufferByteAccess
/// <summary>
/// Gets an IMemoryBuffer as an array of bytes.
/// </summary>
/// <param name="buffer">A pointer to a byte array containing the buffer data.</param>
/// <param name="capacity">The number of bytes in the returned array.</param>
void GetBuffer(out byte* buffer, out uint capacity);
