Skip to content

Instantly share code, notes, and snippets.

@austinbhale
Last active October 5, 2022 22:02
Show Gist options
  • Save austinbhale/c7bb0cef3676259c369cf3e2fe45e962 to your computer and use it in GitHub Desktop.
Save austinbhale/c7bb0cef3676259c369cf3e2fe45e962 to your computer and use it in GitHub Desktop.
SK Media Capture to Audio Graph's device output on the HL2
namespace SKAudioGraph
{
using StereoKit;
using System;
using System.Linq;
using System.Runtime.InteropServices;
using System.Threading;
using System.Threading.Tasks;
using Windows.Foundation;
using Windows.Media;
using Windows.Media.Audio;
using Windows.Media.Capture;
using Windows.Media.Capture.Frames;
using Windows.Media.MediaProperties;
using Windows.Media.Render;
internal class Program
{
private static AudioGraph graph;
private static AudioDeviceOutputNode deviceOutputNode;
private static AudioFrameInputNode frameInputNode;
private enum MenuState
{
Main,
StartingAudio,
StoppingAudio,
}
private static MenuState State
{
get { return (MenuState)Interlocked.CompareExchange(ref state, 0, 0); }
set { Interlocked.Exchange(ref state, (int)value); }
}
private static int state = 0;
private static double outgoingGain = 1;
static void Main(string[] args)
{
// Initialize StereoKit
SKSettings settings = new SKSettings
{
appName = "SKAudioGraph",
assetsFolder = "Assets",
};
if (!SK.Initialize(settings))
Environment.Exit(1);
Vec3 menuPosition = Input.Head.position + Input.Head.Forward * 0.6f + Vec3.Right * 0.15f;
Pose menuPose = new Pose(menuPosition, Quat.LookAt(menuPosition, Input.Head.position));
Vec2 menuSize = new Vec2(20, 10) * U.cm;
string audioText = "Play";
(AudioEncodingProperties audioEncodingProperties, MediaFrameReader audioFrameReader) = InitializeMediaCaptureAsync().GetAwaiter().GetResult();
CreateAudioGraph(audioEncodingProperties).GetAwaiter().GetResult();
var audioFrameHandler = CreateMediaFrameHandler();
// Core application loop
while (SK.Step(() =>
{
UI.WindowBegin("Audio Stream", ref menuPose, menuSize, moveType: UIMove.Exact);
switch (State)
{
case MenuState.Main:
if (UI.Button(audioText))
{
if (audioText.Equals("Play"))
{
State = MenuState.StartingAudio;
audioFrameReader.StartAsync().AsTask().ContinueWith(audioStatus =>
{
if (audioStatus.Result != MediaFrameReaderStartStatus.Success)
{
throw new InvalidOperationException($"Audio stream media frame reader failed to start: {audioStatus}");
}
frameInputNode.Start();
audioFrameReader.FrameArrived += audioFrameHandler;
audioText = "Stop";
State = MenuState.Main;
});
}
else
{
State = MenuState.StoppingAudio;
audioFrameReader.StopAsync().AsTask().ContinueWith(_ =>
{
frameInputNode.Stop();
audioFrameReader.FrameArrived -= audioFrameHandler;
audioText = "Play";
State = MenuState.Main;
});
}
}
break;
case MenuState.StartingAudio:
UI.Button("Play"); // does nothing... just for show
break;
case MenuState.StoppingAudio:
UI.Button("Stop"); // does nothing... just for show
break;
}
UI.Space(2 * U.cm);
float sliderMin = 0;
float sliderMax = 8;
float sliderStep = 0.5f;
float sliderWidth = menuSize.x;
UI.PanelBegin(UIPad.None);
if (UI.HSlider("Gain", ref outgoingGain, sliderMin, sliderMax, sliderStep, sliderWidth, UIConfirm.Pinch))
{
frameInputNode.OutgoingGain = outgoingGain;
}
UI.PanelEnd();
// normalized to [-0.5,0.5] in cm
float percentNormalized = (float)outgoingGain / sliderMax - 0.5f;
Text.Add(
$"{string.Format("{0:0.0}", Math.Truncate(outgoingGain * 10) / 10)}",
Matrix.TS(UI.LayoutLast.center, V.XXX(0.5f)),
offX: -percentNormalized * UI.LayoutLast.dimensions.x * 2,
offY: -UI.LayoutLast.dimensions.y * 1.5f,
offZ: -0.001f
);
UI.Space(2 * U.cm);
if (UI.Button("Exit"))
{
SK.Quit();
}
UI.WindowEnd();
}))
{
}
;
SK.Shutdown();
}
/// <summary>
/// Initializes the MediaCapture object and creates the MediaFrameReaders for the configured capture streams.
/// </summary>
/// <returns>A task representing the asynchronous operation.</returns>
private static async Task<(AudioEncodingProperties, MediaFrameReader)> InitializeMediaCaptureAsync()
{
// Try to find the media capture settings for the requested capture configuration
var settings = new MediaCaptureInitializationSettings
{
AudioProcessing = AudioProcessing.Default,
MediaCategory = MediaCategory.Speech,
StreamingCaptureMode = StreamingCaptureMode.Audio,
MemoryPreference = MediaCaptureMemoryPreference.Cpu,
SharingMode = MediaCaptureSharingMode.ExclusiveControl,
};
// Initialize the MediaCapture object
var mediaCapture = new MediaCapture();
await mediaCapture.InitializeAsync(settings);
AudioEncodingProperties audioEncodingProperties = null;
MediaFrameReader audioFrameReader = null;
foreach (var sourceInfo in mediaCapture.FrameSources
.Where(si => si.Value.Info.MediaStreamType == MediaStreamType.Audio))
{
var audioFrameSource = mediaCapture.FrameSources[sourceInfo.Value.Info.Id];
audioFrameReader = await mediaCapture.CreateFrameReaderAsync(audioFrameSource);
audioEncodingProperties = audioFrameSource.CurrentFormat.AudioEncodingProperties;
}
if (audioFrameReader == null)
{
throw new InvalidOperationException("Could not create a frame reader for the requested audio source.");
}
return (audioEncodingProperties, audioFrameReader);
}
/// <summary>
/// Creates an event handler that handles the FrameArrived event of the MediaFrameReader.
/// </summary>
/// <returns>The event handler.</returns>
private static TypedEventHandler<MediaFrameReader, MediaFrameArrivedEventArgs> CreateMediaFrameHandler()
{
return (sender, args) =>
{
using var frame = sender.TryAcquireLatestFrame();
if (frame != null)
{
using MediaFrameReference mediaFrame = frame.AudioMediaFrame.FrameReference;
using AudioFrame audioFrame = frame.AudioMediaFrame.GetAudioFrame();
AudioEncodingProperties audioEncodingProperties = mediaFrame.AudioMediaFrame.AudioEncodingProperties;
unsafe
{
using AudioBuffer buffer = audioFrame.LockBuffer(AudioBufferAccessMode.Read);
using IMemoryBufferReference reference = buffer.CreateReference();
((UnsafeNative.IMemoryBufferByteAccess)reference).GetBuffer(out byte* audioDataIn, out uint capacity);
uint frameDurMs = (uint)mediaFrame.Duration.TotalMilliseconds;
uint sampleRate = audioEncodingProperties.SampleRate;
uint sampleCount = (frameDurMs * sampleRate) / 1000;
uint numAudioChannels = audioEncodingProperties.ChannelCount;
uint bytesPerSample = audioEncodingProperties.BitsPerSample / 8;
// Buffer size is (number of samples) * (size of each sample)
byte[] audioDataOut = new byte[sampleCount * bytesPerSample];
// Convert to bytes
if (numAudioChannels > 1)
{
// Data is interlaced, so we need to change the multi-channel input
// to the supported single-channel output for StereoKit to consume
uint inPos = 0;
uint outPos = 0;
while (outPos < audioDataOut.Length)
{
byte* src = &audioDataIn[inPos];
fixed (byte* dst = &audioDataOut[outPos])
{
Buffer.MemoryCopy(src, dst, bytesPerSample, bytesPerSample);
}
inPos += bytesPerSample * numAudioChannels;
outPos += bytesPerSample;
}
}
else
{
// Buffer size is (number of samples) * (size of each sample)
byte* src = audioDataIn;
fixed (byte* dst = audioDataOut)
{
Buffer.MemoryCopy(src, dst, audioDataOut.Length, audioDataOut.Length);
}
}
AudioFrame audioData = RetrieveAudioData(audioDataOut);
frameInputNode.AddFrame(audioData);
}
}
};
}
private static async Task CreateAudioGraph(AudioEncodingProperties audioEncodingProperties)
{
// Create an AudioGraph with default settings
AudioGraphSettings settings = new AudioGraphSettings(AudioRenderCategory.Speech);
CreateAudioGraphResult result = await AudioGraph.CreateAsync(settings);
if (result.Status != AudioGraphCreationStatus.Success)
{
// Cannot create graph
Log.Info(String.Format("AudioGraph Creation Error because {0}", result.Status.ToString()));
return;
}
graph = result.Graph;
// Create a device output node
CreateAudioDeviceOutputNodeResult deviceOutputNodeResult = await graph.CreateDeviceOutputNodeAsync();
if (deviceOutputNodeResult.Status != AudioDeviceNodeCreationStatus.Success)
{
// Cannot create device output node
Log.Info(string.Format("Audio Device Output unavailable because {0}", deviceOutputNodeResult.Status.ToString()));
}
deviceOutputNode = deviceOutputNodeResult.DeviceOutputNode;
Log.Info("Device Output Node successfully created");
// Create the FrameInputNode at the same format as the input device.
frameInputNode = graph.CreateFrameInputNode(audioEncodingProperties);
frameInputNode.AddOutgoingConnection(deviceOutputNode);
// Initialize the Frame Input Node in the stopped state
frameInputNode.Stop();
// Start the graph since we will only start/stop the frame input node
graph.Start();
}
unsafe private static AudioFrame RetrieveAudioData(byte[] audioOut)
{
// Buffer size is (number of samples) * (size of each sample)
// We choose to generate single channel (mono) audio. For multi-channel, multiply by number of channels
uint bufferSize = (uint)audioOut.Length;
AudioFrame frame = new AudioFrame(bufferSize);
using AudioBuffer buffer = frame.LockBuffer(AudioBufferAccessMode.Write);
using IMemoryBufferReference reference = buffer.CreateReference();
byte* dataInBytes;
uint capacityInBytes;
float* dataInFloat;
((UnsafeNative.IMemoryBufferByteAccess)reference).GetBuffer(out dataInBytes, out capacityInBytes);
// Cast to float since the data we are generating is float
dataInFloat = (float*)dataInBytes;
fixed (byte* src = audioOut)
{
Buffer.MemoryCopy(src, dataInFloat, bufferSize, bufferSize);
}
return frame;
}
/// <summary>
/// Provides unsafe native APIs.
/// </summary>
private static class UnsafeNative
{
/// <summary>
/// Provides access to an IMemoryBuffer as an array of bytes.
/// </summary>
[ComImport]
[Guid("5B0D3235-4DBA-4D44-865E-8F1D0E4FD04D")]
[InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
public unsafe interface IMemoryBufferByteAccess
{
/// <summary>
/// Gets an IMemoryBuffer as an array of bytes.
/// </summary>
/// <param name="buffer">A pointer to a byte array containing the buffer data.</param>
/// <param name="capacity">The number of bytes in the returned array.</param>
void GetBuffer(out byte* buffer, out uint capacity);
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment