Skip to content

Instantly share code, notes, and snippets.

@austinbhale
Last active March 22, 2024 05:47
Show Gist options
  • Save austinbhale/329ecb46c8748741716dac1643164611 to your computer and use it in GitHub Desktop.
Save austinbhale/329ecb46c8748741716dac1643164611 to your computer and use it in GitHub Desktop.
// <copyright file="Program.cs" company="Nakamir, Inc.">
// Copyright (c) Nakamir, Inc. All rights reserved.
// </copyright>
namespace SK.AudioCapture.UWP;
using StereoKit;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.InteropServices;
using System.Threading.Tasks;
using Windows.Foundation;
using Windows.Media;
using Windows.Media.Capture;
using Windows.Media.Capture.Frames;
using Windows.Media.MediaProperties;
internal class Program
{
private static Sound s_sound;
static void Main(string[] _)
{
// Initialize StereoKit
SKSettings settings = new()
{
appName = "SKMediaCaptureStreamer",
assetsFolder = "Assets",
};
if (!SK.Initialize(settings))
Environment.Exit(1);
MediaFrameReader audioFrameReader = InitializeMediaCaptureAsync().GetAwaiter().GetResult();
MediaFrameReaderStartStatus audioStatus = audioFrameReader.StartAsync().AsTask().GetAwaiter().GetResult();
if (audioStatus != MediaFrameReaderStartStatus.Success)
{
throw new InvalidOperationException($"Audio stream media frame reader failed to start: {audioStatus}");
}
s_sound = Sound.CreateStream(2f);
SoundInst soundInst = s_sound.Play(Input.Head.position);
TypedEventHandler<MediaFrameReader, MediaFrameArrivedEventArgs> audioFrameHandler = CreateMediaFrameHandler();
audioFrameReader.FrameArrived += audioFrameHandler;
audioFrameReader.AcquisitionMode = MediaFrameReaderAcquisitionMode.Buffered;
SK.Run(() => soundInst.Position = Input.Head.position);
}
/// <summary>
/// Initializes the MediaCapture object and creates the MediaFrameReaders for the configured capture streams.
/// </summary>
/// <returns>A task representing the asynchronous operation.</returns>
private static async Task<MediaFrameReader> InitializeMediaCaptureAsync()
{
// Try to find the media capture settings for the requested capture configuration
var settings = new MediaCaptureInitializationSettings
{
AudioProcessing = AudioProcessing.Default,
MediaCategory = MediaCategory.Media, // MediaCategory.Speech is not supported, needs a resampling library
StreamingCaptureMode = StreamingCaptureMode.Audio,
MemoryPreference = MediaCaptureMemoryPreference.Auto,
SharingMode = MediaCaptureSharingMode.ExclusiveControl,
};
// Initialize the MediaCapture object
var mediaCapture = new MediaCapture();
await mediaCapture.InitializeAsync(settings);
MediaFrameReader audioFrameReader = null;
foreach (KeyValuePair<string, MediaFrameSource> sourceInfo in mediaCapture.FrameSources
.Where(si => si.Value.Info.MediaStreamType == MediaStreamType.Audio))
{
MediaFrameSource frameSource = mediaCapture.FrameSources[sourceInfo.Value.Info.Id];
audioFrameReader = await mediaCapture.CreateFrameReaderAsync(frameSource);
}
if (audioFrameReader == null)
{
throw new InvalidOperationException("Could not create a frame reader for the requested audio source.");
}
return audioFrameReader;
}
/// <summary>
/// Creates an event handler that handles the FrameArrived event of the MediaFrameReader.
/// </summary>
/// <returns>The event handler.</returns>
private static TypedEventHandler<MediaFrameReader, MediaFrameArrivedEventArgs> CreateMediaFrameHandler()
{
return (sender, args) =>
{
using MediaFrameReference frame = sender.TryAcquireLatestFrame();
if (frame != null)
{
using MediaFrameReference mediaFrame = frame.AudioMediaFrame.FrameReference;
using AudioFrame audioFrame = frame.AudioMediaFrame.GetAudioFrame();
AudioEncodingProperties audioEncodingProperties = mediaFrame.AudioMediaFrame.AudioEncodingProperties;
unsafe
{
using AudioBuffer buffer = audioFrame.LockBuffer(AudioBufferAccessMode.Read);
using IMemoryBufferReference reference = buffer.CreateReference();
((UnsafeNative.IMemoryBufferByteAccess)reference).GetBuffer(out byte* audioDataIn, out uint capacity);
int frameDurMs = (int)mediaFrame.Duration.TotalMilliseconds;
int sampleRate = (int)audioEncodingProperties.SampleRate;
int sampleCount = (frameDurMs * sampleRate) / 1000;
int numAudioChannels = (int)audioEncodingProperties.ChannelCount;
int bytesPerSample = (int)audioEncodingProperties.BitsPerSample / 8;
// Buffer size is (number of samples) * (size of each sample)
int bufferSize = sampleCount * bytesPerSample;
/// (optional) Scale the audio gain by a percentage amount (e.g., 1.2f == 20% gain)
ScaleAudioGain((IntPtr)audioDataIn, bufferSize, 1.2f);
/// SCENARIO 1: Real-time playback
//ScenarioOne_WriteSamplesDirectly((IntPtr)audioDataIn, bufferSize / 4);
/// SCENARIO 2: Roundtrip to memory
ScenarioTwo_WriteSamplesRoundtrip(audioDataIn, numAudioChannels, bytesPerSample, bufferSize);
}
}
};
}
/// <summary>
/// Applies a gain adjustment to a block of raw audio data.
/// </summary>
/// <param name="audioData">A pointer to the raw audio data in IEEE float format.</param>
/// <param name="bufferSize">The size of the audio data buffer in bytes.</param>
/// <param name="gainFactor">The gain factor to apply. Values greater than 1f increase gain, while values less than 1f decrease gain.</param>
private static unsafe void ScaleAudioGain(IntPtr audioData, int bufferSize, float gainFactor)
{
int numSamples = bufferSize / 4;
for (int i = 0; i < numSamples; ++i)
{
float* sample = (float*)audioData + i;
*sample = Math.Min(Math.Max(*sample * gainFactor, -1.0f), 1.0f);
}
}
private static void ScenarioOne_WriteSamplesDirectly(IntPtr audioData, int sampleSize)
{
s_sound.WriteSamples(audioData, sampleSize);
}
private static unsafe void ScenarioTwo_WriteSamplesRoundtrip(byte* audioData, int numAudioChannels, int bytesPerSample, int bufferSize)
{
byte[] audioDataOut = new byte[bufferSize];
// Convert to bytes
if (numAudioChannels > 1)
{
// Data is interlaced, so we need to change the multi-channel input
// to the supported single-channel output for StereoKit to consume
int inPos = 0;
int outPos = 0;
while (outPos < audioDataOut.Length)
{
byte* src = &audioData[inPos];
fixed (byte* dst = &audioDataOut[outPos])
{
Buffer.MemoryCopy(src, dst, bytesPerSample, bytesPerSample);
}
inPos += bytesPerSample * numAudioChannels;
outPos += bytesPerSample;
}
}
else
{
byte* src = audioData;
fixed (byte* dst = audioDataOut)
{
Buffer.MemoryCopy(src, dst, audioDataOut.Length, audioDataOut.Length);
}
}
var pinnedArray = GCHandle.Alloc(audioDataOut, GCHandleType.Pinned);
IntPtr pointer = pinnedArray.AddrOfPinnedObject();
s_sound.WriteSamples(pointer, audioDataOut.Length / 4);
pinnedArray.Free();
}
}
/// <summary>
/// Provides unsafe native APIs.
/// </summary>
public static class UnsafeNative
{
/// <summary>
/// Provides access to an IMemoryBuffer as an array of bytes.
/// </summary>
[ComImport]
[Guid("5B0D3235-4DBA-4D44-865E-8F1D0E4FD04D")]
[InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
public unsafe interface IMemoryBufferByteAccess
{
/// <summary>
/// Gets an IMemoryBuffer as an array of bytes.
/// </summary>
/// <param name="buffer">A pointer to a byte array containing the buffer data.</param>
/// <param name="capacity">The number of bytes in the returned array.</param>
void GetBuffer(out byte* buffer, out uint capacity);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment