mathandy/mic_listener.py

## mic_listener.py
"""A real-time analog to midi converter.

Listens to you system's microphone and does its best to convert the
sounds it hears to a sequence of musical notes.  It works ok... play
with the sampling settings to get results that fit your needs.

Usage Example:
--------------
    >>> from mic_listen import list_devices, MicListener
    >>> list_devices()  # to list system devices
    >>> MicListener(input_device_index=0).listen()

Credit:
-------
Parts of this code were taken from or inspired by the following repo:
    GitHub:  https://github.com/mzucker/python-tuner/blob/master/tuner.py
    Author:  Matt Zucker
    Date:    July 2016
    License: Creative Commons Attribution-ShareAlike 3.0
             https://creativecommons.org/licenses/by-sa/3.0/us/

License:
--------
This code an be reused per the guidelines given in the
Creative Commons Attribution-ShareAlike 3.0
"""
from __future__ import division, print_function
import numpy as np
import pyaudio
from mingus.containers import Note
from time import time


# See https://newt.phys.unsw.edu.au/jw/notes.html
def freq_to_number(f):
    """Converts a frequency (Hz) to MIDI number.
    E.g. 27.5(A0)-->21"""
    return 69 + 12 * np.log2(f / 440.0)


# See https://newt.phys.unsw.edu.au/jw/notes.html
def number_to_freq(n):
    """Converts a MIDI number to frequency (Hz).
    E.g. 21(A0)-->27.5"""
    return 440 * 2.0 ** ((n - 69) / 12.0)


def int_to_note_name(n):
    # Note: mingus note-integer convention differs by 12
    return Note().from_int(n - 12)


def note_name_to_int(name):
    # Note: mingus note-integer convention differs by 12
    return int(Note(name)) + 12


class SpectralHistogram:
    def __init__(self, notes_in_range, max_refresh_rate=1):
        self.notes_in_range = notes_in_range
        self.max_refresh_rate = max_refresh_rate

        import matplotlib.pyplot as plt
        plt.ion()
        self.fig = plt.figure()
        self.ax = self.fig.add_subplot(111)
        note_names = map(int_to_note_name, notes_in_range)
        self.ax.set_xlabel(note_names)
        self.bars = None
        self.time_drawn = None

    def redraw(self, note_fft):
        if (self.time_drawn is not None and
                time() - self.time_drawn > 1./self.max_refresh_rate):
            return

        if self.bars is None:
            self.bars = self.ax.bar(left=self.notes_in_range,
                                    height=note_fft,
                                    align='center')
        for bar, h in zip(self.bars, note_fft):
            bar.set_height(h)
        self.fig.canvas.draw()
        self.fig.canvas.flush_events()
        self.time_drawn = time()


class MicListener:
    """See `MicListener().listen()`."""
    def __init__(self, input_device_index=0):
        self.input_device = input_device_index

    def listen(self, num_notes=np.inf, duration=np.inf, sampling_rate=22050,
               samples_per_frame=2048, frames_per_fft=16,
               instrument_range=('E-2', 'C-6'), min_note_duration=.05,
               min_rms=10, output_on=False, show_histogram=False):
        """Returns notes heard before time `duration` or `num_notes`.

        Note: Play with the sampling parameters... it's a balance between
        not missing any notes and accidental sounds being considered notes.

        Usage Example:
          >>> list_devices()  # to list system devices
          >>> MicListener(input_device_index=0).listen()
        """

        note_min, note_max = instrument_range
        if isinstance(note_min, str) and isinstance(note_max, str):
            note_min = note_name_to_int(note_min)
            note_max = note_name_to_int(note_max)

        # Create Hanning window function
        samples_per_fft = samples_per_frame * frames_per_fft
        ss = np.linspace(0, 2 * np.pi, samples_per_fft, False)
        window = 0.5 * (1 - np.cos(ss))

        notes_in_range = range(note_min, note_max + 1)
        fftfreqs = np.fft.rfftfreq(len(window), 1.0/sampling_rate)
        note_freqs = map(number_to_freq, notes_in_range)

        # Allocate space to run an FFT.
        buf = np.zeros(samples_per_fft, dtype=np.float32)
        num_frames = 0

        # Initialize audio
        audio_parameters = {'format': pyaudio.paInt16,
                            'channels': 1,
                            'rate': sampling_rate,
                            'input': True,
                            'frames_per_buffer': samples_per_frame,
                            'input_device_index': self.input_device}
        stream = pyaudio.PyAudio().open(**audio_parameters)

        if show_histogram:
            histogram = SpectralHistogram(notes_in_range, max_refresh_rate=1)

        try:
            stream.start_stream()

            if output_on:
                print('sampling at', sampling_rate, 'Hz', '\n')

            note_history = [None]
            start_time = time()
            sound_start_time = None
            previous_note = None
            while stream.is_active():

                # Shift the buffer down, place new samples at the end
                buf[:-samples_per_frame] = buf[samples_per_frame:]
                buf[-samples_per_frame:] = \
                    np.frombuffer(stream.read(samples_per_frame), np.int16)
                frame = buf * window
                num_frames += 1

                rms = np.sqrt(np.mean(frame * frame))  # used as loudness
                if rms > min_rms:
                    silence = False
                    if sound_start_time is None:
                        sound_start_time = time()
                else:
                    silence = True
                    previous_note = None
                    sound_start_time = None

                # if loud enough and buffer is full, find note
                if not silence and num_frames >= frames_per_fft:

                    # Run the FFT on the windowed buffer
                    fft = np.abs(np.fft.rfft(frame))
                    note_fft = np.interp(note_freqs, fftfreqs, fft)

                    # Get frequency of maximum response in range
                    freq = note_freqs[note_fft.argmax()]

                    # Get note number and nearest note
                    n = freq_to_number(freq)
                    n0 = int(round(n))
                    current_note = int_to_note_name(n0)

                    if previous_note != current_note:
                        sound_start_time = time()

                    if show_histogram:
                        histogram.redraw(note_fft)

                    sound_duration = time() - sound_start_time
                    if (sound_duration > min_note_duration and
                            note_history[-1] != current_note):
                        note_history.append(current_note)

                        if output_on:
                            s = ('freq: {:4.2f} Hznote: {:>3s} {:+.2f}'
                                 ''.format(freq, current_note, n - n0))
                            print(s, 'duration:', sound_duration,
                                  'rms:', rms)

                    previous_note = current_note

                if (len(note_history) == num_notes or
                        time() - start_time > duration):
                    return note_history[1:]

        except Exception as e:
            print(e)
        finally:
            stream.close()


def list_devices():
    p = pyaudio.PyAudio()
    device_info = p.get_device_info_by_host_api_device_index
    info = p.get_host_api_info_by_index(0)
    number_of_devices = info.get('deviceCount')
    for i in range(number_of_devices):
        if device_info(0, i).get('maxInputChannels') > 0:
            print("Input Device id ", i, " - ",
                  device_info(0, i).get('name'))


if __name__ == '__main__':
    list_devices()
    input_device = input("Select device:")
    mic = MicListener(input_device)
    mic.listen(num_notes=100, show_histogram=True, output_on=True)
	"""A real-time analog to midi converter.

	Listens to you system's microphone and does its best to convert the
	sounds it hears to a sequence of musical notes. It works ok... play
	with the sampling settings to get results that fit your needs.

	Usage Example:
	--------------
	>>> from mic_listen import list_devices, MicListener
	>>> list_devices() # to list system devices
	>>> MicListener(input_device_index=0).listen()

	Credit:
	-------
	Parts of this code were taken from or inspired by the following repo:
	GitHub: https://github.com/mzucker/python-tuner/blob/master/tuner.py
	Author: Matt Zucker
	Date: July 2016
	License: Creative Commons Attribution-ShareAlike 3.0
	https://creativecommons.org/licenses/by-sa/3.0/us/

	License:
	--------
	This code an be reused per the guidelines given in the
	Creative Commons Attribution-ShareAlike 3.0
	"""
	from __future__ import division, print_function
	import numpy as np
	import pyaudio
	from mingus.containers import Note
	from time import time


	# See https://newt.phys.unsw.edu.au/jw/notes.html
	def freq_to_number(f):
	"""Converts a frequency (Hz) to MIDI number.
	E.g. 27.5(A0)-->21"""
	return 69 + 12 * np.log2(f / 440.0)


	# See https://newt.phys.unsw.edu.au/jw/notes.html
	def number_to_freq(n):
	"""Converts a MIDI number to frequency (Hz).
	E.g. 21(A0)-->27.5"""
	return 440 * 2.0 ** ((n - 69) / 12.0)


	def int_to_note_name(n):
	# Note: mingus note-integer convention differs by 12
	return Note().from_int(n - 12)


	def note_name_to_int(name):
	# Note: mingus note-integer convention differs by 12
	return int(Note(name)) + 12


	class SpectralHistogram:
	def __init__(self, notes_in_range, max_refresh_rate=1):
	self.notes_in_range = notes_in_range
	self.max_refresh_rate = max_refresh_rate

	import matplotlib.pyplot as plt
	plt.ion()
	self.fig = plt.figure()
	self.ax = self.fig.add_subplot(111)
	note_names = map(int_to_note_name, notes_in_range)
	self.ax.set_xlabel(note_names)
	self.bars = None
	self.time_drawn = None

	def redraw(self, note_fft):
	if (self.time_drawn is not None and
	time() - self.time_drawn > 1./self.max_refresh_rate):
	return

	if self.bars is None:
	self.bars = self.ax.bar(left=self.notes_in_range,
	height=note_fft,
	align='center')
	for bar, h in zip(self.bars, note_fft):
	bar.set_height(h)
	self.fig.canvas.draw()
	self.fig.canvas.flush_events()
	self.time_drawn = time()


	class MicListener:
	"""See `MicListener().listen()`."""
	def __init__(self, input_device_index=0):
	self.input_device = input_device_index

	def listen(self, num_notes=np.inf, duration=np.inf, sampling_rate=22050,
	samples_per_frame=2048, frames_per_fft=16,
	instrument_range=('E-2', 'C-6'), min_note_duration=.05,
	min_rms=10, output_on=False, show_histogram=False):
	"""Returns notes heard before time `duration` or `num_notes`.

	Note: Play with the sampling parameters... it's a balance between
	not missing any notes and accidental sounds being considered notes.

	Usage Example:
	>>> list_devices() # to list system devices
	>>> MicListener(input_device_index=0).listen()
	"""

	note_min, note_max = instrument_range
	if isinstance(note_min, str) and isinstance(note_max, str):
	note_min = note_name_to_int(note_min)
	note_max = note_name_to_int(note_max)

	# Create Hanning window function
	samples_per_fft = samples_per_frame * frames_per_fft
	ss = np.linspace(0, 2 * np.pi, samples_per_fft, False)
	window = 0.5 * (1 - np.cos(ss))

	notes_in_range = range(note_min, note_max + 1)
	fftfreqs = np.fft.rfftfreq(len(window), 1.0/sampling_rate)
	note_freqs = map(number_to_freq, notes_in_range)

	# Allocate space to run an FFT.
	buf = np.zeros(samples_per_fft, dtype=np.float32)
	num_frames = 0

	# Initialize audio
	audio_parameters = {'format': pyaudio.paInt16,
	'channels': 1,
	'rate': sampling_rate,
	'input': True,
	'frames_per_buffer': samples_per_frame,
	'input_device_index': self.input_device}
	stream = pyaudio.PyAudio().open(**audio_parameters)

	if show_histogram:
	histogram = SpectralHistogram(notes_in_range, max_refresh_rate=1)

	try:
	stream.start_stream()

	if output_on:
	print('sampling at', sampling_rate, 'Hz', '\n')

	note_history = [None]
	start_time = time()
	sound_start_time = None
	previous_note = None
	while stream.is_active():

	# Shift the buffer down, place new samples at the end
	buf[:-samples_per_frame] = buf[samples_per_frame:]
	buf[-samples_per_frame:] = \
	np.frombuffer(stream.read(samples_per_frame), np.int16)
	frame = buf * window
	num_frames += 1

	rms = np.sqrt(np.mean(frame * frame)) # used as loudness
	if rms > min_rms:
	silence = False
	if sound_start_time is None:
	sound_start_time = time()
	else:
	silence = True
	previous_note = None
	sound_start_time = None

	# if loud enough and buffer is full, find note
	if not silence and num_frames >= frames_per_fft:

	# Run the FFT on the windowed buffer
	fft = np.abs(np.fft.rfft(frame))
	note_fft = np.interp(note_freqs, fftfreqs, fft)

	# Get frequency of maximum response in range
	freq = note_freqs[note_fft.argmax()]

	# Get note number and nearest note
	n = freq_to_number(freq)
	n0 = int(round(n))
	current_note = int_to_note_name(n0)

	if previous_note != current_note:
	sound_start_time = time()

	if show_histogram:
	histogram.redraw(note_fft)

	sound_duration = time() - sound_start_time
	if (sound_duration > min_note_duration and
	note_history[-1] != current_note):
	note_history.append(current_note)

	if output_on:
	s = ('freq: {:4.2f} Hznote: {:>3s} {:+.2f}'
	''.format(freq, current_note, n - n0))
	print(s, 'duration:', sound_duration,
	'rms:', rms)

	previous_note = current_note

	if (len(note_history) == num_notes or
	time() - start_time > duration):
	return note_history[1:]

	except Exception as e:
	print(e)
	finally:
	stream.close()


	def list_devices():
	p = pyaudio.PyAudio()
	device_info = p.get_device_info_by_host_api_device_index
	info = p.get_host_api_info_by_index(0)
	number_of_devices = info.get('deviceCount')
	for i in range(number_of_devices):
	if device_info(0, i).get('maxInputChannels') > 0:
	print("Input Device id ", i, " - ",
	device_info(0, i).get('name'))


	if __name__ == '__main__':
	list_devices()
	input_device = input("Select device:")
	mic = MicListener(input_device)
	mic.listen(num_notes=100, show_histogram=True, output_on=True)