Skip to content

Instantly share code, notes, and snippets.

@martindevans
Last active February 22, 2022 17:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save martindevans/599b6aebf0c5ac1ada7860eb49106315 to your computer and use it in GitHub Desktop.
Save martindevans/599b6aebf0c5ac1ada7860eb49106315 to your computer and use it in GitHub Desktop.
using System;
using System.Collections.Generic;
using Dissonance.Audio.Capture;
using JetBrains.Annotations;
using NatSuite.Devices;
using NAudio.Wave;
using UnityEngine;
namespace Dissonance.Integrations.NatDevice
{
public class NatDeviceMicrophoneInput
: MonoBehaviour, IMicrophoneCapture, IMicrophoneDeviceList
{
private static readonly Log Log = Logs.Create(LogCategory.Recording, nameof(NatDeviceMicrophoneInput));
private float[] _tempSampleBuffer;
private readonly List<IMicrophoneSubscriber> _subscribers = new List<IMicrophoneSubscriber>();
private WaveFormat _format;
private AudioDevice _device;
private bool _audioDeviceChanged;
public bool IsRecording => _device != null && _device.running;
public TimeSpan Latency => TimeSpan.Zero;
[CanBeNull] public AudioDevice Device => IsRecording ? _device : null;
string IMicrophoneCapture.Device => IsRecording ? _device?.name : null;
#region start
// ReSharper disable once ParameterHidesMember
public WaveFormat StartCapture(string name)
{
// Just in case the device is already running, stop it first. This will only happen if the Start/Stop methods are improperly used.
StopCapture();
// Choose an input device, if we can't find one we can't record audio.
var device = ChooseAudioDevice(name);
if (device == null)
return null;
// Store the device we're using
_device = device;
_format = new WaveFormat(device.sampleRate, 1);
// watch for device changes - we need to reset if the audio device changes
AudioSettings.OnAudioConfigurationChanged += OnAudioDeviceChanged;
_audioDeviceChanged = false;
//Reset subscribers to prepare them for another stream of data
lock (_subscribers)
for (var i = 0; i < _subscribers.Count; i++)
_subscribers[i].Reset();
// Start recording. Which delegate is used depends upon whether the mic needs downmixing to mono
if (device.channelCount == 1)
device.StartRunning(OnBufferMono);
else if (device.channelCount == 2)
device.StartRunning(OnBufferDownmixStereo);
else
device.StartRunning(OnBufferDownmixGeneric);
// Return WaveFormat to indicate that recording has started
Log.Info("Began mic capture (SampleRate:{0}Hz, ChannelCount:{1}, AEC:{2}, Device:'{3}')", _device.sampleRate, device.channelCount, device.echoCancellation, device.name);
return _format;
}
[CanBeNull]
private static AudioDevice ChooseAudioDevice([CanBeNull] string name)
{
var criterion = MediaDeviceCriteria.AudioDevice;
var query = new MediaDeviceQuery(criterion);
// Choose a device that matches the specified name. If the name is null/whitespace just use the first valid device
for (var i = 0; i < query.count; i++)
{
if (!(query[i] is AudioDevice audioDevice))
continue;
if (string.IsNullOrWhiteSpace(name) || audioDevice.name.Equals(name, StringComparison.InvariantCultureIgnoreCase))
return audioDevice;
}
// None of the device match the name, use the first available device
return (AudioDevice)query.current;
}
#endregion
private void OnAudioDeviceChanged(bool deviceWasChanged)
{
_audioDeviceChanged |= deviceWasChanged;
}
#region stop
public void OnDestroy()
{
StopCapture();
}
public void StopCapture()
{
// Stop watching for device changes
AudioSettings.OnAudioConfigurationChanged -= OnAudioDeviceChanged;
_audioDeviceChanged = false;
if (_device != null && _device.running)
{
_device.StopRunning();
//Reset subscribers to prepare them for another stream of data
lock (_subscribers)
for (var i = 0; i < _subscribers.Count; i++)
_subscribers[i].Reset();
}
}
#endregion
#region buffer events
private void ResizeTempBuffer(int minLength)
{
if (_tempSampleBuffer == null || minLength > _tempSampleBuffer.Length)
_tempSampleBuffer = new float[minLength];
}
private void OnBufferMono([NotNull] AudioBuffer buffer)
{
// Ensure the buffer is large enough
ResizeTempBuffer(buffer.sampleBuffer.Length);
// Copy into temp buffer and send to subscribers
var source = buffer.sampleBuffer;
var count = source.Length;
for (var i = 0; i < count; i++)
_tempSampleBuffer[i] = source[i];
SendToSubscribers(new ArraySegment<float>(_tempSampleBuffer, 0, count));
}
private void OnBufferDownmixStereo([NotNull] AudioBuffer buffer)
{
// Downmix stereo -> mono
// Samples are interleaved, so the `sampleBuffer` looks like:
//
// L1, R1, L2, R2, L3, R3 ...
//
// This needs to be transformed into:
//
// output[i] = 0.5 * (Left[i] + Right[i])
// Ensure the buffer is large enough
var monoSamples = buffer.sampleBuffer.Length / 2;
ResizeTempBuffer(monoSamples);
// Transform the data into the temp buffer, averaging together the two channels
for (var i = 0; i < monoSamples; i++)
{
var l = buffer.sampleBuffer[i * 2];
var r = buffer.sampleBuffer[i * 2 + 1];
_tempSampleBuffer[i] = 0.5f * (l + r);
}
SendToSubscribers(new ArraySegment<float>(_tempSampleBuffer, 0, monoSamples));
}
private void OnBufferDownmixGeneric([NotNull] AudioBuffer buffer)
{
// Downmix N-Channel audio -> mono.
// Technically different weights are needed for different channel counts, but it doesn't
// really matter for voice (we're not doing beamforming, which is the only time it would be relevant)
//
// Samples are interleaved, so the `sampleBuffer` looks like:
//
// A1, B1, C1, ..., A2, B2, C2, ..., A3, B3, C3, ...
//
// This needs to be transformed into:
//
// output[i] = (1/channel_count) * (A[i] + B[i] + C[i] + ...[i])
var channels = _device.channelCount;
var factor = 1f / channels;
// Ensure the buffer is large enough
var monoSamples = buffer.sampleBuffer.Length / channels;
ResizeTempBuffer(monoSamples);
// Transform the data into the temp buffer, averaging together all the channels
for (var i = 0; i < monoSamples; i++)
{
// Add together all the channels
var sum = 0f;
for (var j = 0; j < _device.channelCount; j++)
sum += buffer.sampleBuffer[i * channels + j];
// Divide by channel count and save into proper place in buffer
_tempSampleBuffer[i] = factor * sum;
}
SendToSubscribers(new ArraySegment<float>(_tempSampleBuffer, 0, monoSamples));
}
private void SendToSubscribers(ArraySegment<float> samples)
{
lock (_subscribers)
for (var i = 0; i < _subscribers.Count; i++)
_subscribers[i].ReceiveMicrophoneData(samples, _format);
}
#endregion
public bool UpdateSubscribers()
{
// The audio handling is event driven. All we need to do here is trigger reset is something has gone wrong.
// If device is not recording something is wrong!
if (!IsRecording)
return true;
// If the audio device changes then we really don't know what state we're in any more (e.g. the mic could have just been unplugged).
// Force the mic to reinitialize (setting us back to a known state).
if (_audioDeviceChanged)
{
Log.Debug("Audio device changed - restarting mic");
_audioDeviceChanged = false;
return true;
}
// Everything is ok
return false;
}
#region subscribers
public void Subscribe(IMicrophoneSubscriber listener)
{
if (listener == null) throw new ArgumentNullException(nameof(listener));
lock (_subscribers)
_subscribers.Add(listener);
}
public bool Unsubscribe(IMicrophoneSubscriber listener)
{
if (listener == null) throw new ArgumentNullException(nameof(listener));
lock (_subscribers)
return _subscribers.Remove(listener);
}
#endregion
void IMicrophoneDeviceList.GetDevices(List<string> output)
{
var criterion = MediaDeviceCriteria.AudioDevice;
var query = new MediaDeviceQuery(criterion);
for (var i = 0; i < query.count; i++)
{
if (!(query[i] is AudioDevice audioDevice))
continue;
output.Add(audioDevice.name);
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment