Skip to content

Instantly share code, notes, and snippets.

@manugarri
Forked from livibetter/README.rst
Created May 11, 2016 12:25
Show Gist options
  • Save manugarri/1c0fcfe9619b775bb82de0790ccb88da to your computer and use it in GitHub Desktop.
Save manugarri/1c0fcfe9619b775bb82de0790ccb88da to your computer and use it in GitHub Desktop.
Frequency spectrum of sound using PyAudio, NumPy, and Matplotlib

This Gist is about how I use PyAudio, NumPy, and Matplotlib to plot freqency spectrum of system sound or microphone.

You can read this blog post for more detail or watch this video:

https://i.ytimg.com/vi/hiGB_AP6iTo/maxresdefault.jpg
#!/bin/bash
ffmpeg -i temp.mp4 -i temp.wav -vcodec copy -acodec libmp3lame sound-spectrum.mp4
#!/usr/bin/env python
# Written by Yu-Jie Lin
# Public Domain
#
# Deps: PyAudio, NumPy, and Matplotlib
# Blog: http://blog.yjl.im/2012/11/frequency-spectrum-of-sound-using.html
from __future__ import print_function
import struct
import wave
import matplotlib.animation as animation
import matplotlib.pyplot as plt
import numpy as np
TITLE = ''
WIDTH = 1280
HEIGHT = 720
FPS = 25.0
nFFT = 512
BUF_SIZE = 4 * nFFT
SAMPLE_SIZE = 2
CHANNELS = 2
RATE = 44100
def animate(i, line, wf, MAX_y):
N = (int((i + 1) * RATE / FPS) - wf.tell()) / nFFT
if not N:
return line,
N *= nFFT
data = wf.readframes(N)
print('{:5.1f}% - V: {:5,d} - A: {:10,d} / {:10,d}'.format(
100.0 * wf.tell() / wf.getnframes(), i, wf.tell(), wf.getnframes()
))
# Unpack data, LRLRLR...
y = np.array(struct.unpack("%dh" % (len(data) / SAMPLE_SIZE), data)) / MAX_y
y_L = y[::2]
y_R = y[1::2]
Y_L = np.fft.fft(y_L, nFFT)
Y_R = np.fft.fft(y_R, nFFT)
# Sewing FFT of two channels together, DC part uses right channel's
Y = abs(np.hstack((Y_L[-nFFT / 2:-1], Y_R[:nFFT / 2])))
line.set_ydata(Y)
return line,
def init(line):
# This data is a clear frame for animation
line.set_ydata(np.zeros(nFFT - 1))
return line,
def main():
dpi = plt.rcParams['figure.dpi']
plt.rcParams['savefig.dpi'] = dpi
plt.rcParams["figure.figsize"] = (1.0 * WIDTH / dpi, 1.0 * HEIGHT / dpi)
fig = plt.figure()
# Frequency range
x_f = 1.0 * np.arange(-nFFT / 2 + 1, nFFT / 2) / nFFT * RATE
ax = fig.add_subplot(111, title=TITLE, xlim=(x_f[0], x_f[-1]),
ylim=(0, 2 * np.pi * nFFT ** 2 / RATE))
ax.set_yscale('symlog', linthreshy=nFFT ** 0.5)
line, = ax.plot(x_f, np.zeros(nFFT - 1))
# Change x tick labels for left channel
def change_xlabel(evt):
labels = [label.get_text().replace(u'\u2212', '')
for label in ax.get_xticklabels()]
ax.set_xticklabels(labels)
fig.canvas.mpl_disconnect(drawid)
drawid = fig.canvas.mpl_connect('draw_event', change_xlabel)
MAX_y = 2.0 ** (SAMPLE_SIZE * 8 - 1)
wf = wave.open('temp.wav', 'rb')
assert wf.getnchannels() == CHANNELS
assert wf.getsampwidth() == SAMPLE_SIZE
assert wf.getframerate() == RATE
frames = wf.getnframes()
ani = animation.FuncAnimation(
fig, animate, int(frames / RATE * FPS),
init_func=lambda: init(line), fargs=(line, wf, MAX_y),
interval=1000.0 / FPS, blit=True
)
ani.save('temp.mp4', fps=FPS)
wf.close()
if __name__ == '__main__':
main()
#!/usr/bin/env python
# Written by Yu-Jie Lin
# Public Domain
#
# Deps: PyAudio, NumPy, and Matplotlib
# Blog: http://blog.yjl.im/2012/11/frequency-spectrum-of-sound-using.html
import struct
import wave
import matplotlib.animation as animation
import matplotlib.pyplot as plt
import numpy as np
import pyaudio
SAVE = 0.0
TITLE = ''
WIDTH = 1280
HEIGHT = 720
FPS = 25.0
nFFT = 512
BUF_SIZE = 4 * nFFT
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
def animate(i, line, stream, wf, MAX_y):
# Read n*nFFT frames from stream, n > 0
N = max(stream.get_read_available() / nFFT, 1) * nFFT
data = stream.read(N)
if SAVE:
wf.writeframes(data)
# Unpack data, LRLRLR...
y = np.array(struct.unpack("%dh" % (N * CHANNELS), data)) / MAX_y
y_L = y[::2]
y_R = y[1::2]
Y_L = np.fft.fft(y_L, nFFT)
Y_R = np.fft.fft(y_R, nFFT)
# Sewing FFT of two channels together, DC part uses right channel's
Y = abs(np.hstack((Y_L[-nFFT / 2:-1], Y_R[:nFFT / 2])))
line.set_ydata(Y)
return line,
def init(line):
# This data is a clear frame for animation
line.set_ydata(np.zeros(nFFT - 1))
return line,
def main():
dpi = plt.rcParams['figure.dpi']
plt.rcParams['savefig.dpi'] = dpi
plt.rcParams["figure.figsize"] = (1.0 * WIDTH / dpi, 1.0 * HEIGHT / dpi)
fig = plt.figure()
# Frequency range
x_f = 1.0 * np.arange(-nFFT / 2 + 1, nFFT / 2) / nFFT * RATE
ax = fig.add_subplot(111, title=TITLE, xlim=(x_f[0], x_f[-1]),
ylim=(0, 2 * np.pi * nFFT ** 2 / RATE))
ax.set_yscale('symlog', linthreshy=nFFT ** 0.5)
line, = ax.plot(x_f, np.zeros(nFFT - 1))
# Change x tick labels for left channel
def change_xlabel(evt):
labels = [label.get_text().replace(u'\u2212', '')
for label in ax.get_xticklabels()]
ax.set_xticklabels(labels)
fig.canvas.mpl_disconnect(drawid)
drawid = fig.canvas.mpl_connect('draw_event', change_xlabel)
p = pyaudio.PyAudio()
# Used for normalizing signal. If use paFloat32, then it's already -1..1.
# Because of saving wave, paInt16 will be easier.
MAX_y = 2.0 ** (p.get_sample_size(FORMAT) * 8 - 1)
frames = None
wf = None
if SAVE:
frames = int(FPS * SAVE)
wf = wave.open('temp.wav', 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=BUF_SIZE)
ani = animation.FuncAnimation(
fig, animate, frames,
init_func=lambda: init(line), fargs=(line, stream, wf, MAX_y),
interval=1000.0 / FPS, blit=True
)
if SAVE:
ani.save('temp.mp4', fps=FPS)
else:
plt.show()
stream.stop_stream()
stream.close()
p.terminate()
if SAVE:
wf.close()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment