austinbhale/SK.AudioCapture.UWP.cs

## SK.AudioCapture.UWP.cs
// <copyright file="Program.cs" company="Nakamir, Inc.">
// Copyright (c) Nakamir, Inc. All rights reserved.
// </copyright>
namespace SK.AudioCapture.UWP;

using StereoKit;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.InteropServices;
using System.Threading.Tasks;
using Windows.Foundation;
using Windows.Media;
using Windows.Media.Capture;
using Windows.Media.Capture.Frames;
using Windows.Media.MediaProperties;

internal class Program
{
    private static Sound s_sound;

    static void Main(string[] _)
    {
        // Initialize StereoKit
        SKSettings settings = new()
        {
            appName = "SKMediaCaptureStreamer",
            assetsFolder = "Assets",
        };
        if (!SK.Initialize(settings))
            Environment.Exit(1);

        MediaFrameReader audioFrameReader = InitializeMediaCaptureAsync().GetAwaiter().GetResult();

        MediaFrameReaderStartStatus audioStatus = audioFrameReader.StartAsync().AsTask().GetAwaiter().GetResult();
        if (audioStatus != MediaFrameReaderStartStatus.Success)
        {
            throw new InvalidOperationException($"Audio stream media frame reader failed to start: {audioStatus}");
        }

        s_sound = Sound.CreateStream(2f);
        SoundInst soundInst = s_sound.Play(Input.Head.position);

        TypedEventHandler<MediaFrameReader, MediaFrameArrivedEventArgs> audioFrameHandler = CreateMediaFrameHandler();
        audioFrameReader.FrameArrived += audioFrameHandler;
        audioFrameReader.AcquisitionMode = MediaFrameReaderAcquisitionMode.Buffered;

        SK.Run(() => soundInst.Position = Input.Head.position);
    }

    /// <summary>
    /// Initializes the MediaCapture object and creates the MediaFrameReaders for the configured capture streams.
    /// </summary>
    /// <returns>A task representing the asynchronous operation.</returns>
    private static async Task<MediaFrameReader> InitializeMediaCaptureAsync()
    {
        // Try to find the media capture settings for the requested capture configuration
        var settings = new MediaCaptureInitializationSettings
        {
            AudioProcessing = AudioProcessing.Default,
            MediaCategory = MediaCategory.Media, // MediaCategory.Speech is not supported, needs a resampling library
            StreamingCaptureMode = StreamingCaptureMode.Audio,
            MemoryPreference = MediaCaptureMemoryPreference.Auto,
            SharingMode = MediaCaptureSharingMode.ExclusiveControl,
        };

        // Initialize the MediaCapture object
        var mediaCapture = new MediaCapture();
        await mediaCapture.InitializeAsync(settings);

        MediaFrameReader audioFrameReader = null;
        foreach (KeyValuePair<string, MediaFrameSource> sourceInfo in mediaCapture.FrameSources
            .Where(si => si.Value.Info.MediaStreamType == MediaStreamType.Audio))
        {
            MediaFrameSource frameSource = mediaCapture.FrameSources[sourceInfo.Value.Info.Id];
            audioFrameReader = await mediaCapture.CreateFrameReaderAsync(frameSource);
        }

        if (audioFrameReader == null)
        {
            throw new InvalidOperationException("Could not create a frame reader for the requested audio source.");
        }

        return audioFrameReader;
    }

    /// <summary>
    /// Creates an event handler that handles the FrameArrived event of the MediaFrameReader.
    /// </summary>
    /// <returns>The event handler.</returns>
    private static TypedEventHandler<MediaFrameReader, MediaFrameArrivedEventArgs> CreateMediaFrameHandler()
    {
        return (sender, args) =>
        {
            using MediaFrameReference frame = sender.TryAcquireLatestFrame();
            if (frame != null)
            {
                using MediaFrameReference mediaFrame = frame.AudioMediaFrame.FrameReference;
                using AudioFrame audioFrame = frame.AudioMediaFrame.GetAudioFrame();

                AudioEncodingProperties audioEncodingProperties = mediaFrame.AudioMediaFrame.AudioEncodingProperties;

                unsafe
                {
                    using AudioBuffer buffer = audioFrame.LockBuffer(AudioBufferAccessMode.Read);
                    using IMemoryBufferReference reference = buffer.CreateReference();
                    ((UnsafeNative.IMemoryBufferByteAccess)reference).GetBuffer(out byte* audioDataIn, out uint capacity);

                    int frameDurMs = (int)mediaFrame.Duration.TotalMilliseconds;
                    int sampleRate = (int)audioEncodingProperties.SampleRate;
                    int sampleCount = (frameDurMs * sampleRate) / 1000;

                    int numAudioChannels = (int)audioEncodingProperties.ChannelCount;
                    int bytesPerSample = (int)audioEncodingProperties.BitsPerSample / 8;

                    // Buffer size is (number of samples) * (size of each sample)
                    int bufferSize = sampleCount * bytesPerSample;

                    /// (optional) Scale the audio gain by a percentage amount (e.g., 1.2f == 20% gain)
                    ScaleAudioGain((IntPtr)audioDataIn, bufferSize, 1.2f);

                    /// SCENARIO 1: Real-time playback
                    //ScenarioOne_WriteSamplesDirectly((IntPtr)audioDataIn, bufferSize / 4);

                    /// SCENARIO 2: Roundtrip to memory
                    ScenarioTwo_WriteSamplesRoundtrip(audioDataIn, numAudioChannels, bytesPerSample, bufferSize);
                }
            }
        };
    }

    /// <summary>
    /// Applies a gain adjustment to a block of raw audio data.
    /// </summary>
    /// <param name="audioData">A pointer to the raw audio data in IEEE float format.</param>
    /// <param name="bufferSize">The size of the audio data buffer in bytes.</param>
    /// <param name="gainFactor">The gain factor to apply. Values greater than 1f increase gain, while values less than 1f decrease gain.</param>
    private static unsafe void ScaleAudioGain(IntPtr audioData, int bufferSize, float gainFactor)
    {
        int numSamples = bufferSize / 4;
        for (int i = 0; i < numSamples; ++i)
        {
            float* sample = (float*)audioData + i;
            *sample = Math.Min(Math.Max(*sample * gainFactor, -1.0f), 1.0f);
        }
    }

    private static void ScenarioOne_WriteSamplesDirectly(IntPtr audioData, int sampleSize)
    {
        s_sound.WriteSamples(audioData, sampleSize);
    }

    private static unsafe void ScenarioTwo_WriteSamplesRoundtrip(byte* audioData, int numAudioChannels, int bytesPerSample, int bufferSize)
    {
        byte[] audioDataOut = new byte[bufferSize];

        // Convert to bytes
        if (numAudioChannels > 1)
        {
            // Data is interlaced, so we need to change the multi-channel input
            // to the supported single-channel output for StereoKit to consume
            int inPos = 0;
            int outPos = 0;

            while (outPos < audioDataOut.Length)
            {
                byte* src = &audioData[inPos];
                fixed (byte* dst = &audioDataOut[outPos])
                {
                    Buffer.MemoryCopy(src, dst, bytesPerSample, bytesPerSample);
                }

                inPos += bytesPerSample * numAudioChannels;
                outPos += bytesPerSample;
            }
        }
        else
        {
            byte* src = audioData;
            fixed (byte* dst = audioDataOut)
            {
                Buffer.MemoryCopy(src, dst, audioDataOut.Length, audioDataOut.Length);
            }
        }

        var pinnedArray = GCHandle.Alloc(audioDataOut, GCHandleType.Pinned);
        IntPtr pointer = pinnedArray.AddrOfPinnedObject();
        s_sound.WriteSamples(pointer, audioDataOut.Length / 4);
        pinnedArray.Free();
    }
}

/// <summary>
/// Provides unsafe native APIs.
/// </summary>
public static class UnsafeNative
{
    /// <summary>
    /// Provides access to an IMemoryBuffer as an array of bytes.
    /// </summary>
    [ComImport]
    [Guid("5B0D3235-4DBA-4D44-865E-8F1D0E4FD04D")]
    [InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
    public unsafe interface IMemoryBufferByteAccess
    {
        /// <summary>
        /// Gets an IMemoryBuffer as an array of bytes.
        /// </summary>
        /// <param name="buffer">A pointer to a byte array containing the buffer data.</param>
        /// <param name="capacity">The number of bytes in the returned array.</param>
        void GetBuffer(out byte* buffer, out uint capacity);
    }
}
	// <copyright file="Program.cs" company="Nakamir, Inc.">
	// Copyright (c) Nakamir, Inc. All rights reserved.
	// </copyright>
	namespace SK.AudioCapture.UWP;

	using StereoKit;
	using System;
	using System.Collections.Generic;
	using System.Linq;
	using System.Runtime.InteropServices;
	using System.Threading.Tasks;
	using Windows.Foundation;
	using Windows.Media;
	using Windows.Media.Capture;
	using Windows.Media.Capture.Frames;
	using Windows.Media.MediaProperties;

	internal class Program
	{
	private static Sound s_sound;

	static void Main(string[] _)
	{
	// Initialize StereoKit
	SKSettings settings = new()
	{
	appName = "SKMediaCaptureStreamer",
	assetsFolder = "Assets",
	};
	if (!SK.Initialize(settings))
	Environment.Exit(1);

	MediaFrameReader audioFrameReader = InitializeMediaCaptureAsync().GetAwaiter().GetResult();

	MediaFrameReaderStartStatus audioStatus = audioFrameReader.StartAsync().AsTask().GetAwaiter().GetResult();
	if (audioStatus != MediaFrameReaderStartStatus.Success)
	{
	throw new InvalidOperationException($"Audio stream media frame reader failed to start: {audioStatus}");
	}

	s_sound = Sound.CreateStream(2f);
	SoundInst soundInst = s_sound.Play(Input.Head.position);

	TypedEventHandler<MediaFrameReader, MediaFrameArrivedEventArgs> audioFrameHandler = CreateMediaFrameHandler();
	audioFrameReader.FrameArrived += audioFrameHandler;
	audioFrameReader.AcquisitionMode = MediaFrameReaderAcquisitionMode.Buffered;

	SK.Run(() => soundInst.Position = Input.Head.position);
	}

	/// <summary>
	/// Initializes the MediaCapture object and creates the MediaFrameReaders for the configured capture streams.
	/// </summary>
	/// <returns>A task representing the asynchronous operation.</returns>
	private static async Task<MediaFrameReader> InitializeMediaCaptureAsync()
	{
	// Try to find the media capture settings for the requested capture configuration
	var settings = new MediaCaptureInitializationSettings
	{
	AudioProcessing = AudioProcessing.Default,
	MediaCategory = MediaCategory.Media, // MediaCategory.Speech is not supported, needs a resampling library
	StreamingCaptureMode = StreamingCaptureMode.Audio,
	MemoryPreference = MediaCaptureMemoryPreference.Auto,
	SharingMode = MediaCaptureSharingMode.ExclusiveControl,
	};

	// Initialize the MediaCapture object
	var mediaCapture = new MediaCapture();
	await mediaCapture.InitializeAsync(settings);

	MediaFrameReader audioFrameReader = null;
	foreach (KeyValuePair<string, MediaFrameSource> sourceInfo in mediaCapture.FrameSources
	.Where(si => si.Value.Info.MediaStreamType == MediaStreamType.Audio))
	{
	MediaFrameSource frameSource = mediaCapture.FrameSources[sourceInfo.Value.Info.Id];
	audioFrameReader = await mediaCapture.CreateFrameReaderAsync(frameSource);
	}

	if (audioFrameReader == null)
	{
	throw new InvalidOperationException("Could not create a frame reader for the requested audio source.");
	}

	return audioFrameReader;
	}

	/// <summary>
	/// Creates an event handler that handles the FrameArrived event of the MediaFrameReader.
	/// </summary>
	/// <returns>The event handler.</returns>
	private static TypedEventHandler<MediaFrameReader, MediaFrameArrivedEventArgs> CreateMediaFrameHandler()
	{
	return (sender, args) =>
	{
	using MediaFrameReference frame = sender.TryAcquireLatestFrame();
	if (frame != null)
	{
	using MediaFrameReference mediaFrame = frame.AudioMediaFrame.FrameReference;
	using AudioFrame audioFrame = frame.AudioMediaFrame.GetAudioFrame();

	AudioEncodingProperties audioEncodingProperties = mediaFrame.AudioMediaFrame.AudioEncodingProperties;

	unsafe
	{
	using AudioBuffer buffer = audioFrame.LockBuffer(AudioBufferAccessMode.Read);
	using IMemoryBufferReference reference = buffer.CreateReference();
	((UnsafeNative.IMemoryBufferByteAccess)reference).GetBuffer(out byte* audioDataIn, out uint capacity);

	int frameDurMs = (int)mediaFrame.Duration.TotalMilliseconds;
	int sampleRate = (int)audioEncodingProperties.SampleRate;
	int sampleCount = (frameDurMs * sampleRate) / 1000;

	int numAudioChannels = (int)audioEncodingProperties.ChannelCount;
	int bytesPerSample = (int)audioEncodingProperties.BitsPerSample / 8;

	// Buffer size is (number of samples) * (size of each sample)
	int bufferSize = sampleCount * bytesPerSample;

	/// (optional) Scale the audio gain by a percentage amount (e.g., 1.2f == 20% gain)
	ScaleAudioGain((IntPtr)audioDataIn, bufferSize, 1.2f);

	/// SCENARIO 1: Real-time playback
	//ScenarioOne_WriteSamplesDirectly((IntPtr)audioDataIn, bufferSize / 4);

	/// SCENARIO 2: Roundtrip to memory
	ScenarioTwo_WriteSamplesRoundtrip(audioDataIn, numAudioChannels, bytesPerSample, bufferSize);
	}
	}
	};
	}

	/// <summary>
	/// Applies a gain adjustment to a block of raw audio data.
	/// </summary>
	/// <param name="audioData">A pointer to the raw audio data in IEEE float format.</param>
	/// <param name="bufferSize">The size of the audio data buffer in bytes.</param>
	/// <param name="gainFactor">The gain factor to apply. Values greater than 1f increase gain, while values less than 1f decrease gain.</param>
	private static unsafe void ScaleAudioGain(IntPtr audioData, int bufferSize, float gainFactor)
	{
	int numSamples = bufferSize / 4;
	for (int i = 0; i < numSamples; ++i)
	{
	float* sample = (float*)audioData + i;
	sample = Math.Min(Math.Max(sample * gainFactor, -1.0f), 1.0f);
	}
	}

	private static void ScenarioOne_WriteSamplesDirectly(IntPtr audioData, int sampleSize)
	{
	s_sound.WriteSamples(audioData, sampleSize);
	}

	private static unsafe void ScenarioTwo_WriteSamplesRoundtrip(byte* audioData, int numAudioChannels, int bytesPerSample, int bufferSize)
	{
	byte[] audioDataOut = new byte[bufferSize];

	// Convert to bytes
	if (numAudioChannels > 1)
	{
	// Data is interlaced, so we need to change the multi-channel input
	// to the supported single-channel output for StereoKit to consume
	int inPos = 0;
	int outPos = 0;

	while (outPos < audioDataOut.Length)
	{
	byte* src = &audioData[inPos];
	fixed (byte* dst = &audioDataOut[outPos])
	{
	Buffer.MemoryCopy(src, dst, bytesPerSample, bytesPerSample);
	}

	inPos += bytesPerSample * numAudioChannels;
	outPos += bytesPerSample;
	}
	}
	else
	{
	byte* src = audioData;
	fixed (byte* dst = audioDataOut)
	{
	Buffer.MemoryCopy(src, dst, audioDataOut.Length, audioDataOut.Length);
	}
	}

	var pinnedArray = GCHandle.Alloc(audioDataOut, GCHandleType.Pinned);
	IntPtr pointer = pinnedArray.AddrOfPinnedObject();
	s_sound.WriteSamples(pointer, audioDataOut.Length / 4);
	pinnedArray.Free();
	}
	}

	/// <summary>
	/// Provides unsafe native APIs.
	/// </summary>
	public static class UnsafeNative
	{
	/// <summary>
	/// Provides access to an IMemoryBuffer as an array of bytes.
	/// </summary>
	[ComImport]
	[Guid("5B0D3235-4DBA-4D44-865E-8F1D0E4FD04D")]
	[InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
	public unsafe interface IMemoryBufferByteAccess
	{
	/// <summary>
	/// Gets an IMemoryBuffer as an array of bytes.
	/// </summary>
	/// <param name="buffer">A pointer to a byte array containing the buffer data.</param>
	/// <param name="capacity">The number of bytes in the returned array.</param>
	void GetBuffer(out byte* buffer, out uint capacity);
	}
	}