Skip to content

Instantly share code, notes, and snippets.

Created August 5, 2018 07:59
Show Gist options
  • Save mathandy/edf81b4c74c1a8a3087c0cd33646d94f to your computer and use it in GitHub Desktop.
Save mathandy/edf81b4c74c1a8a3087c0cd33646d94f to your computer and use it in GitHub Desktop.
A real-time analog to midi converter
"""A real-time analog to midi converter.
Listens to you system's microphone and does its best to convert the
sounds it hears to a sequence of musical notes. It works ok... play
with the sampling settings to get results that fit your needs.
Usage Example:
>>> from mic_listen import list_devices, MicListener
>>> list_devices() # to list system devices
>>> MicListener(input_device_index=0).listen()
Parts of this code were taken from or inspired by the following repo:
Author: Matt Zucker
Date: July 2016
License: Creative Commons Attribution-ShareAlike 3.0
This code an be reused per the guidelines given in the
Creative Commons Attribution-ShareAlike 3.0
from __future__ import division, print_function
import numpy as np
import pyaudio
from mingus.containers import Note
from time import time
# See
def freq_to_number(f):
"""Converts a frequency (Hz) to MIDI number.
E.g. 27.5(A0)-->21"""
return 69 + 12 * np.log2(f / 440.0)
# See
def number_to_freq(n):
"""Converts a MIDI number to frequency (Hz).
E.g. 21(A0)-->27.5"""
return 440 * 2.0 ** ((n - 69) / 12.0)
def int_to_note_name(n):
# Note: mingus note-integer convention differs by 12
return Note().from_int(n - 12)
def note_name_to_int(name):
# Note: mingus note-integer convention differs by 12
return int(Note(name)) + 12
class SpectralHistogram:
def __init__(self, notes_in_range, max_refresh_rate=1):
self.notes_in_range = notes_in_range
self.max_refresh_rate = max_refresh_rate
import matplotlib.pyplot as plt
self.fig = plt.figure() = self.fig.add_subplot(111)
note_names = map(int_to_note_name, notes_in_range)
self.bars = None
self.time_drawn = None
def redraw(self, note_fft):
if (self.time_drawn is not None and
time() - self.time_drawn > 1./self.max_refresh_rate):
if self.bars is None:
self.bars =,
for bar, h in zip(self.bars, note_fft):
self.time_drawn = time()
class MicListener:
"""See `MicListener().listen()`."""
def __init__(self, input_device_index=0):
self.input_device = input_device_index
def listen(self, num_notes=np.inf, duration=np.inf, sampling_rate=22050,
samples_per_frame=2048, frames_per_fft=16,
instrument_range=('E-2', 'C-6'), min_note_duration=.05,
min_rms=10, output_on=False, show_histogram=False):
"""Returns notes heard before time `duration` or `num_notes`.
Note: Play with the sampling parameters... it's a balance between
not missing any notes and accidental sounds being considered notes.
Usage Example:
>>> list_devices() # to list system devices
>>> MicListener(input_device_index=0).listen()
note_min, note_max = instrument_range
if isinstance(note_min, str) and isinstance(note_max, str):
note_min = note_name_to_int(note_min)
note_max = note_name_to_int(note_max)
# Create Hanning window function
samples_per_fft = samples_per_frame * frames_per_fft
ss = np.linspace(0, 2 * np.pi, samples_per_fft, False)
window = 0.5 * (1 - np.cos(ss))
notes_in_range = range(note_min, note_max + 1)
fftfreqs = np.fft.rfftfreq(len(window), 1.0/sampling_rate)
note_freqs = map(number_to_freq, notes_in_range)
# Allocate space to run an FFT.
buf = np.zeros(samples_per_fft, dtype=np.float32)
num_frames = 0
# Initialize audio
audio_parameters = {'format': pyaudio.paInt16,
'channels': 1,
'rate': sampling_rate,
'input': True,
'frames_per_buffer': samples_per_frame,
'input_device_index': self.input_device}
stream = pyaudio.PyAudio().open(**audio_parameters)
if show_histogram:
histogram = SpectralHistogram(notes_in_range, max_refresh_rate=1)
if output_on:
print('sampling at', sampling_rate, 'Hz', '\n')
note_history = [None]
start_time = time()
sound_start_time = None
previous_note = None
while stream.is_active():
# Shift the buffer down, place new samples at the end
buf[:-samples_per_frame] = buf[samples_per_frame:]
buf[-samples_per_frame:] = \
np.frombuffer(, np.int16)
frame = buf * window
num_frames += 1
rms = np.sqrt(np.mean(frame * frame)) # used as loudness
if rms > min_rms:
silence = False
if sound_start_time is None:
sound_start_time = time()
silence = True
previous_note = None
sound_start_time = None
# if loud enough and buffer is full, find note
if not silence and num_frames >= frames_per_fft:
# Run the FFT on the windowed buffer
fft = np.abs(np.fft.rfft(frame))
note_fft = np.interp(note_freqs, fftfreqs, fft)
# Get frequency of maximum response in range
freq = note_freqs[note_fft.argmax()]
# Get note number and nearest note
n = freq_to_number(freq)
n0 = int(round(n))
current_note = int_to_note_name(n0)
if previous_note != current_note:
sound_start_time = time()
if show_histogram:
sound_duration = time() - sound_start_time
if (sound_duration > min_note_duration and
note_history[-1] != current_note):
if output_on:
s = ('freq: {:4.2f} Hznote: {:>3s} {:+.2f}'
''.format(freq, current_note, n - n0))
print(s, 'duration:', sound_duration,
'rms:', rms)
previous_note = current_note
if (len(note_history) == num_notes or
time() - start_time > duration):
return note_history[1:]
except Exception as e:
def list_devices():
p = pyaudio.PyAudio()
device_info = p.get_device_info_by_host_api_device_index
info = p.get_host_api_info_by_index(0)
number_of_devices = info.get('deviceCount')
for i in range(number_of_devices):
if device_info(0, i).get('maxInputChannels') > 0:
print("Input Device id ", i, " - ",
device_info(0, i).get('name'))
if __name__ == '__main__':
input_device = input("Select device:")
mic = MicListener(input_device)
mic.listen(num_notes=100, show_histogram=True, output_on=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment