martindevans/NatDeviceMicrophoneInput.cs Secret

## NatDeviceMicrophoneInput.cs
using System;
using System.Collections.Generic;
using Dissonance.Audio.Capture;
using JetBrains.Annotations;
using NatSuite.Devices;
using NAudio.Wave;
using UnityEngine;

namespace Dissonance.Integrations.NatDevice
{
    public class NatDeviceMicrophoneInput
        : MonoBehaviour, IMicrophoneCapture, IMicrophoneDeviceList
    {
        private static readonly Log Log = Logs.Create(LogCategory.Recording, nameof(NatDeviceMicrophoneInput));

        private float[] _tempSampleBuffer;

        private readonly List<IMicrophoneSubscriber> _subscribers = new List<IMicrophoneSubscriber>();
        private WaveFormat _format;
        private AudioDevice _device;
        private bool _audioDeviceChanged;

        public bool IsRecording => _device != null && _device.running;

        public TimeSpan Latency => TimeSpan.Zero;

        [CanBeNull] public AudioDevice Device => IsRecording ? _device : null;

        string IMicrophoneCapture.Device => IsRecording ? _device?.name : null;

        #region start
        // ReSharper disable once ParameterHidesMember
        public WaveFormat StartCapture(string name)
        {
            // Just in case the device is already running, stop it first. This will only happen if the Start/Stop methods are improperly used.
            StopCapture();

            // Choose an input device, if we can't find one we can't record audio.
            var device = ChooseAudioDevice(name);
            if (device == null)
                return null;

            // Store the device we're using
            _device = device;
            _format = new WaveFormat(device.sampleRate, 1);

            // watch for device changes - we need to reset if the audio device changes
            AudioSettings.OnAudioConfigurationChanged += OnAudioDeviceChanged;
            _audioDeviceChanged = false;

            //Reset subscribers to prepare them for another stream of data
            lock (_subscribers)
                for (var i = 0; i < _subscribers.Count; i++)
                    _subscribers[i].Reset();

            // Start recording. Which delegate is used depends upon whether the mic needs downmixing to mono
            if (device.channelCount == 1)
                device.StartRunning(OnBufferMono);
            else if (device.channelCount == 2)
                device.StartRunning(OnBufferDownmixStereo);
            else
                device.StartRunning(OnBufferDownmixGeneric);


            // Return WaveFormat to indicate that recording has started
            Log.Info("Began mic capture (SampleRate:{0}Hz, ChannelCount:{1}, AEC:{2}, Device:'{3}')", _device.sampleRate, device.channelCount, device.echoCancellation, device.name);
            return _format;
        }

        [CanBeNull]
        private static AudioDevice ChooseAudioDevice([CanBeNull] string name)
        {
            var criterion = MediaDeviceCriteria.AudioDevice;
            var query = new MediaDeviceQuery(criterion);

            // Choose a device that matches the specified name. If the name is null/whitespace just use the first valid device
            for (var i = 0; i < query.count; i++)
            {
                if (!(query[i] is AudioDevice audioDevice))
                    continue;
                if (string.IsNullOrWhiteSpace(name) || audioDevice.name.Equals(name, StringComparison.InvariantCultureIgnoreCase))
                    return audioDevice;
            }

            // None of the device match the name, use the first available device
            return (AudioDevice)query.current;
        }
        #endregion

        private void OnAudioDeviceChanged(bool deviceWasChanged)
        {
            _audioDeviceChanged |= deviceWasChanged;
        }

        #region stop
        public void OnDestroy()
        {
            StopCapture();
        }

        public void StopCapture()
        {
            // Stop watching for device changes
            AudioSettings.OnAudioConfigurationChanged -= OnAudioDeviceChanged;
            _audioDeviceChanged = false;

            if (_device != null && _device.running)
            {
                _device.StopRunning();

                //Reset subscribers to prepare them for another stream of data
                lock (_subscribers)
                    for (var i = 0; i < _subscribers.Count; i++)
                        _subscribers[i].Reset();
            }
        }
        #endregion

        #region buffer events
        private void ResizeTempBuffer(int minLength)
        {
            if (_tempSampleBuffer == null || minLength > _tempSampleBuffer.Length)
                _tempSampleBuffer = new float[minLength];
        }

        private void OnBufferMono([NotNull] AudioBuffer buffer)
        {
            // Ensure the buffer is large enough
            ResizeTempBuffer(buffer.sampleBuffer.Length);

            // Copy into temp buffer and send to subscribers
            var source = buffer.sampleBuffer;
            var count = source.Length;
            for (var i = 0; i < count; i++)
                _tempSampleBuffer[i] = source[i];

            SendToSubscribers(new ArraySegment<float>(_tempSampleBuffer, 0, count));
        }

        private void OnBufferDownmixStereo([NotNull] AudioBuffer buffer)
        {
            // Downmix stereo -> mono
            // Samples are interleaved, so the `sampleBuffer` looks like:
            //
            //     L1, R1, L2, R2, L3, R3 ...
            //
            // This needs to be transformed into:
            //
            //     output[i] = 0.5 * (Left[i] + Right[i])

            // Ensure the buffer is large enough
            var monoSamples = buffer.sampleBuffer.Length / 2;
            ResizeTempBuffer(monoSamples);

            // Transform the data into the temp buffer, averaging together the two channels
            for (var i = 0; i < monoSamples; i++)
            {
                var l = buffer.sampleBuffer[i * 2];
                var r = buffer.sampleBuffer[i * 2 + 1];
                _tempSampleBuffer[i] = 0.5f * (l + r);
            }


            SendToSubscribers(new ArraySegment<float>(_tempSampleBuffer, 0, monoSamples));
        }

        private void OnBufferDownmixGeneric([NotNull] AudioBuffer buffer)
        {
            // Downmix N-Channel audio -> mono.
            // Technically different weights are needed for different channel counts, but it doesn't
            // really matter for voice (we're not doing beamforming, which is the only time it would be relevant)
            //
            // Samples are interleaved, so the `sampleBuffer` looks like:
            //
            //     A1, B1, C1, ..., A2, B2, C2, ..., A3, B3, C3, ...
            //
            // This needs to be transformed into:
            //
            //     output[i] = (1/channel_count) * (A[i] + B[i] + C[i] + ...[i])

            var channels = _device.channelCount;
            var factor = 1f / channels;

            // Ensure the buffer is large enough
            var monoSamples = buffer.sampleBuffer.Length / channels;
            ResizeTempBuffer(monoSamples);

            // Transform the data into the temp buffer, averaging together all the channels
            for (var i = 0; i < monoSamples; i++)
            {
                // Add together all the channels
                var sum = 0f;
                for (var j = 0; j < _device.channelCount; j++)
                    sum += buffer.sampleBuffer[i * channels + j];

                // Divide by channel count and save into proper place in buffer
                _tempSampleBuffer[i] = factor * sum;
            }

            SendToSubscribers(new ArraySegment<float>(_tempSampleBuffer, 0, monoSamples));
        }

        private void SendToSubscribers(ArraySegment<float> samples)
        {
            lock (_subscribers)
                for (var i = 0; i < _subscribers.Count; i++)
                    _subscribers[i].ReceiveMicrophoneData(samples, _format);
        }
        #endregion

        public bool UpdateSubscribers()
        {
            // The audio handling is event driven. All we need to do here is trigger reset is something has gone wrong.

            // If device is not recording something is wrong!
            if (!IsRecording)
                return true;

            // If the audio device changes then we really don't know what state we're in any more (e.g. the mic could have just been unplugged).
            // Force the mic to reinitialize (setting us back to a known state).
            if (_audioDeviceChanged)
            {
                Log.Debug("Audio device changed - restarting mic");
                _audioDeviceChanged = false;
                return true;
            }

            // Everything is ok
            return false;
        }

        #region subscribers
        public void Subscribe(IMicrophoneSubscriber listener)
        {
            if (listener == null) throw new ArgumentNullException(nameof(listener));

            lock (_subscribers)
                _subscribers.Add(listener);
        }

        public bool Unsubscribe(IMicrophoneSubscriber listener)
        {
            if (listener == null) throw new ArgumentNullException(nameof(listener));

            lock (_subscribers)
                return _subscribers.Remove(listener);
        }
        #endregion

        void IMicrophoneDeviceList.GetDevices(List<string> output)
        {
            var criterion = MediaDeviceCriteria.AudioDevice;
            var query = new MediaDeviceQuery(criterion);

            for (var i = 0; i < query.count; i++)
            {
                if (!(query[i] is AudioDevice audioDevice))
                    continue;
                output.Add(audioDevice.name);
            }
        }
    }
}
	using System;
	using System.Collections.Generic;
	using Dissonance.Audio.Capture;
	using JetBrains.Annotations;
	using NatSuite.Devices;
	using NAudio.Wave;
	using UnityEngine;

	namespace Dissonance.Integrations.NatDevice
	{
	public class NatDeviceMicrophoneInput
	: MonoBehaviour, IMicrophoneCapture, IMicrophoneDeviceList
	{
	private static readonly Log Log = Logs.Create(LogCategory.Recording, nameof(NatDeviceMicrophoneInput));

	private float[] _tempSampleBuffer;

	private readonly List<IMicrophoneSubscriber> _subscribers = new List<IMicrophoneSubscriber>();
	private WaveFormat _format;
	private AudioDevice _device;
	private bool _audioDeviceChanged;

	public bool IsRecording => _device != null && _device.running;

	public TimeSpan Latency => TimeSpan.Zero;

	[CanBeNull] public AudioDevice Device => IsRecording ? _device : null;

	string IMicrophoneCapture.Device => IsRecording ? _device?.name : null;

	#region start
	// ReSharper disable once ParameterHidesMember
	public WaveFormat StartCapture(string name)
	{
	// Just in case the device is already running, stop it first. This will only happen if the Start/Stop methods are improperly used.
	StopCapture();

	// Choose an input device, if we can't find one we can't record audio.
	var device = ChooseAudioDevice(name);
	if (device == null)
	return null;

	// Store the device we're using
	_device = device;
	_format = new WaveFormat(device.sampleRate, 1);

	// watch for device changes - we need to reset if the audio device changes
	AudioSettings.OnAudioConfigurationChanged += OnAudioDeviceChanged;
	_audioDeviceChanged = false;

	//Reset subscribers to prepare them for another stream of data
	lock (_subscribers)
	for (var i = 0; i < _subscribers.Count; i++)
	_subscribers[i].Reset();

	// Start recording. Which delegate is used depends upon whether the mic needs downmixing to mono
	if (device.channelCount == 1)
	device.StartRunning(OnBufferMono);
	else if (device.channelCount == 2)
	device.StartRunning(OnBufferDownmixStereo);
	else
	device.StartRunning(OnBufferDownmixGeneric);


	// Return WaveFormat to indicate that recording has started
	Log.Info("Began mic capture (SampleRate:{0}Hz, ChannelCount:{1}, AEC:{2}, Device:'{3}')", _device.sampleRate, device.channelCount, device.echoCancellation, device.name);
	return _format;
	}

	[CanBeNull]
	private static AudioDevice ChooseAudioDevice([CanBeNull] string name)
	{
	var criterion = MediaDeviceCriteria.AudioDevice;
	var query = new MediaDeviceQuery(criterion);

	// Choose a device that matches the specified name. If the name is null/whitespace just use the first valid device
	for (var i = 0; i < query.count; i++)
	{
	if (!(query[i] is AudioDevice audioDevice))
	continue;
	if (string.IsNullOrWhiteSpace(name) \|\| audioDevice.name.Equals(name, StringComparison.InvariantCultureIgnoreCase))
	return audioDevice;
	}

	// None of the device match the name, use the first available device
	return (AudioDevice)query.current;
	}
	#endregion

	private void OnAudioDeviceChanged(bool deviceWasChanged)
	{
	_audioDeviceChanged \|= deviceWasChanged;
	}

	#region stop
	public void OnDestroy()
	{
	StopCapture();
	}

	public void StopCapture()
	{
	// Stop watching for device changes
	AudioSettings.OnAudioConfigurationChanged -= OnAudioDeviceChanged;
	_audioDeviceChanged = false;

	if (_device != null && _device.running)
	{
	_device.StopRunning();

	//Reset subscribers to prepare them for another stream of data
	lock (_subscribers)
	for (var i = 0; i < _subscribers.Count; i++)
	_subscribers[i].Reset();
	}
	}
	#endregion

	#region buffer events
	private void ResizeTempBuffer(int minLength)
	{
	if (_tempSampleBuffer == null \|\| minLength > _tempSampleBuffer.Length)
	_tempSampleBuffer = new float[minLength];
	}

	private void OnBufferMono([NotNull] AudioBuffer buffer)
	{
	// Ensure the buffer is large enough
	ResizeTempBuffer(buffer.sampleBuffer.Length);

	// Copy into temp buffer and send to subscribers
	var source = buffer.sampleBuffer;
	var count = source.Length;
	for (var i = 0; i < count; i++)
	_tempSampleBuffer[i] = source[i];

	SendToSubscribers(new ArraySegment<float>(_tempSampleBuffer, 0, count));
	}

	private void OnBufferDownmixStereo([NotNull] AudioBuffer buffer)
	{
	// Downmix stereo -> mono
	// Samples are interleaved, so the `sampleBuffer` looks like:
	//
	// L1, R1, L2, R2, L3, R3 ...
	//
	// This needs to be transformed into:
	//
	// output[i] = 0.5 * (Left[i] + Right[i])

	// Ensure the buffer is large enough
	var monoSamples = buffer.sampleBuffer.Length / 2;
	ResizeTempBuffer(monoSamples);

	// Transform the data into the temp buffer, averaging together the two channels
	for (var i = 0; i < monoSamples; i++)
	{
	var l = buffer.sampleBuffer[i * 2];
	var r = buffer.sampleBuffer[i * 2 + 1];
	_tempSampleBuffer[i] = 0.5f * (l + r);
	}


	SendToSubscribers(new ArraySegment<float>(_tempSampleBuffer, 0, monoSamples));
	}

	private void OnBufferDownmixGeneric([NotNull] AudioBuffer buffer)
	{
	// Downmix N-Channel audio -> mono.
	// Technically different weights are needed for different channel counts, but it doesn't
	// really matter for voice (we're not doing beamforming, which is the only time it would be relevant)
	//
	// Samples are interleaved, so the `sampleBuffer` looks like:
	//
	// A1, B1, C1, ..., A2, B2, C2, ..., A3, B3, C3, ...
	//
	// This needs to be transformed into:
	//
	// output[i] = (1/channel_count) * (A[i] + B[i] + C[i] + ...[i])

	var channels = _device.channelCount;
	var factor = 1f / channels;

	// Ensure the buffer is large enough
	var monoSamples = buffer.sampleBuffer.Length / channels;
	ResizeTempBuffer(monoSamples);

	// Transform the data into the temp buffer, averaging together all the channels
	for (var i = 0; i < monoSamples; i++)
	{
	// Add together all the channels
	var sum = 0f;
	for (var j = 0; j < _device.channelCount; j++)
	sum += buffer.sampleBuffer[i * channels + j];

	// Divide by channel count and save into proper place in buffer
	_tempSampleBuffer[i] = factor * sum;
	}

	SendToSubscribers(new ArraySegment<float>(_tempSampleBuffer, 0, monoSamples));
	}

	private void SendToSubscribers(ArraySegment<float> samples)
	{
	lock (_subscribers)
	for (var i = 0; i < _subscribers.Count; i++)
	_subscribers[i].ReceiveMicrophoneData(samples, _format);
	}
	#endregion

	public bool UpdateSubscribers()
	{
	// The audio handling is event driven. All we need to do here is trigger reset is something has gone wrong.

	// If device is not recording something is wrong!
	if (!IsRecording)
	return true;

	// If the audio device changes then we really don't know what state we're in any more (e.g. the mic could have just been unplugged).
	// Force the mic to reinitialize (setting us back to a known state).
	if (_audioDeviceChanged)
	{
	Log.Debug("Audio device changed - restarting mic");
	_audioDeviceChanged = false;
	return true;
	}

	// Everything is ok
	return false;
	}

	#region subscribers
	public void Subscribe(IMicrophoneSubscriber listener)
	{
	if (listener == null) throw new ArgumentNullException(nameof(listener));

	lock (_subscribers)
	_subscribers.Add(listener);
	}

	public bool Unsubscribe(IMicrophoneSubscriber listener)
	{
	if (listener == null) throw new ArgumentNullException(nameof(listener));

	lock (_subscribers)
	return _subscribers.Remove(listener);
	}
	#endregion

	void IMicrophoneDeviceList.GetDevices(List<string> output)
	{
	var criterion = MediaDeviceCriteria.AudioDevice;
	var query = new MediaDeviceQuery(criterion);

	for (var i = 0; i < query.count; i++)
	{
	if (!(query[i] is AudioDevice audioDevice))
	continue;
	output.Add(audioDevice.name);
	}
	}
	}
	}