bzamecnik/animate_chromagram_with_both_linear_and_fifth_steps.py

## animate_chromagram_with_both_linear_and_fifth_steps.py
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import moviepy.editor as mpy
from moviepy.video.io.bindings import mplfig_to_npimage
import numpy as np
import os
from scipy.signal import medfilt
import tfr

# --- parameters ---
audio_file = '/Users/bzamecnik/Dropbox/Documents/harmoneye-labs/harmoneye/data/wav/c-scale-piano-mono.wav'
video_file = "c-scale-piano-mono - chromagram.mp4"
fps = 30
window_size = 50
key = 0

# ------

signal_frames = tfr.SignalFrames(audio_file, frame_size=4096, hop_size=2048)

fs = signal_frames.sample_rate
output_frame_size = fs / fps

X_pitchgram = tfr.pitchgram(signal_frames, output_frame_size=output_frame_size, magnitudes='power_db_normalized')

X_pitchgram_harmonic = medfilt(X_pitchgram, (15, 1))

X_octave_chromagram = X_pitchgram_harmonic[:,:115//12*12].reshape(-1, 115//12, 12)
X_chromagram = X_octave_chromagram.mean(axis=1)

# here pitch class 0 = A, so we have to shift it to make 0 = C!
X_chromagram = np.roll(X_chromagram, -3, axis=1)

data = np.vstack([
    np.zeros((window_size//2, X_chromagram.shape[1])),
    X_chromagram,
    np.zeros((window_size//2, X_chromagram.shape[1]))])
print(data.shape)

frame_count = len(X_chromagram)
duration = frame_count / fps
print(frame_count, duration)

tone_labels = np.array(['C', 'Db', 'D', 'Eb', 'E', 'F', 'Gb', 'G', 'Ab', 'A', 'Bb', 'B'])

def array_indexes(key, fifths):
    step = 7 if fifths else 1
    # move F to make a continuous patch for the diatonic set
    f_shift = -1 if fifths else 0
    return ((step * (np.arange(12) + f_shift) + key + 12)) % 12

def label_indexes(fifths):
    step = 7 if fifths else 1
    f_shift = -1 if fifths else 0
    return (step * (np.arange(12) + f_shift)) % 12

array_idx_step_1 = array_indexes(key, fifths=False)
array_idx_step_7 = array_indexes(key, fifths=True)
label_idx_step_1 = label_indexes(fifths=False)
label_idx_step_7 = label_indexes(fifths=True)

fig, axes = plt.subplots(nrows=2, figsize=(10, 6), facecolor='white')

def create_plot(ax, array_idx, label_idx):
    imshow_plot = ax.imshow(data[0:window_size, array_idx].T,
        interpolation='nearest', cmap='gray', vmin=0, origin='lower')
    ax.axvline(window_size//2, color='r')
    ax.set_xlabel('time')
    ax.set_ylabel('pitch class')
    ax.tick_params(axis='x', which='both', bottom='off', top='off', labelbottom='off')
    ax.set_yticks(np.arange(12))
    ax.set_yticklabels(tone_labels[label_idx])
    return imshow_plot

imshow_plot_step_1 = create_plot(axes[0], array_idx_step_1, label_idx_step_1)
imshow_plot_step_7 = create_plot(axes[1], array_idx_step_7, label_idx_step_7)

axes[1].axhline(6.5, color='g')
axes[1].axhline(-0.5+0.1, color='g')

fig.suptitle(os.path.basename(audio_file))
fig.tight_layout()

def make_frame_mpl(t):
    # round() to prevent floating point errors that would lead to skipping back
    # a frame
    i = int(round(t * fps))
    imshow_plot_step_1.set_data(data[i:i+window_size, array_idx_step_1].T)
    imshow_plot_step_7.set_data(data[i:i+window_size, array_idx_step_7].T)
    return mplfig_to_npimage(fig)

animation = mpy.VideoClip(make_frame_mpl, duration=duration)
animation.audio = mpy.AudioFileClip(audio_file)
animation.write_videofile(video_file, fps=fps)

# fig.savefig(video_file + '.png')
	import matplotlib as mpl
	mpl.use('Agg')
	import matplotlib.pyplot as plt
	import moviepy.editor as mpy
	from moviepy.video.io.bindings import mplfig_to_npimage
	import numpy as np
	import os
	from scipy.signal import medfilt
	import tfr

	# --- parameters ---
	audio_file = '/Users/bzamecnik/Dropbox/Documents/harmoneye-labs/harmoneye/data/wav/c-scale-piano-mono.wav'
	video_file = "c-scale-piano-mono - chromagram.mp4"
	fps = 30
	window_size = 50
	key = 0

	# ------

	signal_frames = tfr.SignalFrames(audio_file, frame_size=4096, hop_size=2048)

	fs = signal_frames.sample_rate
	output_frame_size = fs / fps

	X_pitchgram = tfr.pitchgram(signal_frames, output_frame_size=output_frame_size, magnitudes='power_db_normalized')

	X_pitchgram_harmonic = medfilt(X_pitchgram, (15, 1))

	X_octave_chromagram = X_pitchgram_harmonic[:,:115//12*12].reshape(-1, 115//12, 12)
	X_chromagram = X_octave_chromagram.mean(axis=1)

	# here pitch class 0 = A, so we have to shift it to make 0 = C!
	X_chromagram = np.roll(X_chromagram, -3, axis=1)

	data = np.vstack([
	np.zeros((window_size//2, X_chromagram.shape[1])),
	X_chromagram,
	np.zeros((window_size//2, X_chromagram.shape[1]))])
	print(data.shape)

	frame_count = len(X_chromagram)
	duration = frame_count / fps
	print(frame_count, duration)

	tone_labels = np.array(['C', 'Db', 'D', 'Eb', 'E', 'F', 'Gb', 'G', 'Ab', 'A', 'Bb', 'B'])

	def array_indexes(key, fifths):
	step = 7 if fifths else 1
	# move F to make a continuous patch for the diatonic set
	f_shift = -1 if fifths else 0
	return ((step * (np.arange(12) + f_shift) + key + 12)) % 12

	def label_indexes(fifths):
	step = 7 if fifths else 1
	f_shift = -1 if fifths else 0
	return (step * (np.arange(12) + f_shift)) % 12

	array_idx_step_1 = array_indexes(key, fifths=False)
	array_idx_step_7 = array_indexes(key, fifths=True)
	label_idx_step_1 = label_indexes(fifths=False)
	label_idx_step_7 = label_indexes(fifths=True)

	fig, axes = plt.subplots(nrows=2, figsize=(10, 6), facecolor='white')

	def create_plot(ax, array_idx, label_idx):
	imshow_plot = ax.imshow(data[0:window_size, array_idx].T,
	interpolation='nearest', cmap='gray', vmin=0, origin='lower')
	ax.axvline(window_size//2, color='r')
	ax.set_xlabel('time')
	ax.set_ylabel('pitch class')
	ax.tick_params(axis='x', which='both', bottom='off', top='off', labelbottom='off')
	ax.set_yticks(np.arange(12))
	ax.set_yticklabels(tone_labels[label_idx])
	return imshow_plot

	imshow_plot_step_1 = create_plot(axes[0], array_idx_step_1, label_idx_step_1)
	imshow_plot_step_7 = create_plot(axes[1], array_idx_step_7, label_idx_step_7)

	axes[1].axhline(6.5, color='g')
	axes[1].axhline(-0.5+0.1, color='g')

	fig.suptitle(os.path.basename(audio_file))
	fig.tight_layout()

	def make_frame_mpl(t):
	# round() to prevent floating point errors that would lead to skipping back
	# a frame
	i = int(round(t * fps))
	imshow_plot_step_1.set_data(data[i:i+window_size, array_idx_step_1].T)
	imshow_plot_step_7.set_data(data[i:i+window_size, array_idx_step_7].T)
	return mplfig_to_npimage(fig)

	animation = mpy.VideoClip(make_frame_mpl, duration=duration)
	animation.audio = mpy.AudioFileClip(audio_file)
	animation.write_videofile(video_file, fps=fps)

	# fig.savefig(video_file + '.png')