Skip to content

Instantly share code, notes, and snippets.

@jesseengel
Created September 5, 2017 17:10
Show Gist options
  • Save jesseengel/e223622e255bd5b8c9130407397a0494 to your computer and use it in GitHub Desktop.
Save jesseengel/e223622e255bd5b8c9130407397a0494 to your computer and use it in GitHub Desktop.
Script to plot "rainbowgrams" from NSynth (https://arxiv.org/abs/1704.01279)
import os
import librosa
import matplotlib
import matplotlib.pyplot as plt
matplotlib.rcParams['svg.fonttype'] = 'none'
import numpy as np
from scipy.io.wavfile import read as readwav
# Constants
n_fft = 512
hop_length = 256
SR = 16000
over_sample = 4
res_factor = 0.8
octaves = 6
notes_per_octave=10
# Plotting functions
cdict = {'red': ((0.0, 0.0, 0.0),
(1.0, 0.0, 0.0)),
'green': ((0.0, 0.0, 0.0),
(1.0, 0.0, 0.0)),
'blue': ((0.0, 0.0, 0.0),
(1.0, 0.0, 0.0)),
'alpha': ((0.0, 1.0, 1.0),
(1.0, 0.0, 0.0))
}
my_mask = matplotlib.colors.LinearSegmentedColormap('MyMask', cdict)
plt.register_cmap(cmap=my_mask)
def note_specgram(path, ax, peak=70.0, use_cqt=True):
# Add several samples together
if isinstance(path, list):
for i, p in enumerate(path):
sr, a = readwav(f)
audio = a if i == 0 else a + audio
# Load one sample
else:
sr, audio = readwav(f)
audio = audio.astype(np.float32)
if use_cqt:
C = librosa.cqt(audio, sr=sr, hop_length=hop_length,
bins_per_octave=int(notes_per_octave*over_sample),
n_bins=int(octaves * notes_per_octave * over_sample),
real=False,
filter_scale=res_factor,
fmin=librosa.note_to_hz('C2'))
else:
C = librosa.stft(audio, n_fft=n_fft, win_length=n_fft, hop_length=hop_length, center=True)
mag, phase = librosa.core.magphase(C)
phase_angle = np.angle(phase)
phase_unwrapped = np.unwrap(phase_angle)
dphase = phase_unwrapped[:, 1:] - phase_unwrapped[:, :-1]
dphase = np.concatenate([phase_unwrapped[:, 0:1], dphase], axis=1) / np.pi
mag = (librosa.logamplitude(mag**2, amin=1e-13, top_db=peak, ref_power=np.max) / peak) + 1
ax.matshow(dphase[::-1, :], cmap=plt.cm.rainbow)
ax.matshow(mag[::-1, :], cmap=my_mask)
def plot_notes(list_of_paths, rows=2, cols=4, col_labels=[], row_labels=[],
use_cqt=True, peak=70.0):
"""Build a CQT rowsXcols.
"""
column = 0
N = len(list_of_paths)
assert N == rows*cols
fig, axes = plt.subplots(rows, cols, sharex=True, sharey=True)
fig.subplots_adjust(left=0.1, right=0.9, wspace=0.05, hspace=0.1)
# fig = plt.figure(figsize=(18, N * 1.25))
for i, path in enumerate(list_of_paths):
row = i / cols
col = i % cols
if rows == 1:
ax = axes[col]
elif cols == 1:
ax = axes[row]
else:
ax = axes[row, col]
print row, col, path, ax, peak, use_cqt
note_specgram(path, ax, peak, use_cqt)
ax.set_axis_bgcolor('white')
ax.set_xticks([]); ax.set_yticks([])
if col == 0 and row_labels:
ax.set_ylabel(row_labels[row])
if row == rows-1 and col_labels:
ax.set_xlabel(col_labels[col])
@bluenote10
Copy link

bluenote10 commented Sep 15, 2019

A few more adjustments for newer versions of librosa / matplotlib:

  • cqt no longer has real parameter, but it can be simply removed since it defaults to complex values anyway now.
  • logamplitude has been removed. I think the equivalent is now mag = (librosa.power_to_db(mag ** 2, amin=1e-13, top_db=peak, ref=np.max) / peak) + 1.
  • ax.set_axis_bgcolor no longer exists, but can be removed or replaced by ax.set_facecolor.

@AhmadMoussa
Copy link

Forgive the dumb question, but why are we using scipy to read the wavefile when we are already importing librosa?

@iCorv
Copy link

iCorv commented Nov 28, 2019

Because for standard WAV file (PCM 16-bit or float 32-bit) scipy is much faster. However, librosa load function supports mp3 and other formats by using ffmpeg.

@AhmadMoussa
Copy link

Because for standard WAV file (PCM 16-bit or float 32-bit) scipy is much faster. However, librosa load function supports mp3 and other formats by using ffmpeg.

Thank you for the answer, this makes sense. I had to figure out the hard way that librosa is much slower than scipy when it comes to loading in audio.

@sreeharsha-rav
Copy link

Need to change print statement on line 87 to
print(row, col, path, ax, peak, use_cqt)

Then librosa.cqt cmd in line 48 to
` C = librosa.cqt(audio, sr=sr, hop_length=hop_length,
fmin=librosa.note_to_hz('C2'),
n_bins=int(octaves * notes_per_octave * over_sample),
bins_per_octave=int(notes_per_octave*over_sample),
filter_scale=res_factor)

In line 61 : logamplitude does not exist anymore (replaced by amplitude_to_db)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment