bzamecnik/animate_chroma_polar.py

## animate_chroma_polar.py
"""
What pitch classes are playing?

video: https://www.youtube.com/watch?v=DOJyjMQHP8U

We computed a chromagram, ie. a sequence of pitch class vectors in
time using the Python tfr library (https://github.com/bzamecnik/tfr)
and animated it with matplotlib and moviepy. The tfr library computes
very sharp spectrograms and allows to transform frequencies to pitches.
Pitches are folded to classes by ignoring the octave producing
a chromagram. It is then smoothed by median filter to get rid of
percussive components.

Music: Taberna Folk: Greensleeves
(https://www.youtube.com/watch?v=pguNvlp5w-4) - CC BY

This is an excerpts from the beginning of Greensleeves. The song is
in the key of Eb. Pitch classes are shown relative to this key for
easier understanding (like if we transposed it to the key of C).

Chords (source: https://tabs.ultimate-guitar.com/m/misc_traditional/greensleeves_crd.htm):

Am7 Cmaj7 | G Em | Am | E

Basically I tried to replicate the basic functionality of the HarmonEye
(http://harmoneye.com) app, that I wrote previously in Java/Android,
now using my the tfr library and other Python libs so that it's easy to
produce a video directly.
"""

import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import moviepy.editor as mpy
from moviepy.video.io.bindings import mplfig_to_npimage
import numpy as np
from scipy.signal import medfilt
import tfr

# --- parameters ---
audio_file = 'green-sleeves-intro.flac'
video_file = "green-sleeves-chroma-polar.mp4"
fps = 30
key = 3
fifths = False

# ------

signal_frames = tfr.SignalFrames(audio_file, frame_size=4096, hop_size=512)

fs = signal_frames.sample_rate
output_frame_size = fs / fps

X_pitchgram = tfr.pitchgram(signal_frames, output_frame_size=output_frame_size, magnitudes='power_db_normalized')

X_octave_chromagram = X_pitchgram[:,:115//12*12].reshape(-1, 115//12, 12)
X_chromagram = X_octave_chromagram.mean(axis=1)
X_chromagram = (X_chromagram + 120) / 120

X_chromagram_harmonic = medfilt(X_chromagram, (15, 1))
X_chromagram_percussive = medfilt(X_chromagram, (1, 15))


frame_count = len(X_chromagram)
duration = frame_count / fps

data = X_chromagram_harmonic

step = 7 if fifths else 1
idx = (step * (np.arange(12) + key + 12)) % 12
relative_idx = (step * np.arange(12)) % 12

fig = plt.figure(figsize=(10, 6), facecolor='white')
ax = fig.add_subplot(111, projection='polar')
theta = np.arange(12)/12*2*np.pi*-1 + np.pi/2
r = data[0]
r = r / np.sqrt((r**2).sum())
# r = r / data.max()
markerline, stemlines, baseline = ax.stem(theta, r)
ax.set_yticklabels([])
ax.set_rmax(1.0)
tone_labels = np.array(['C', 'Db', 'D', 'Eb', 'E', 'F', 'Gb', 'G', 'Ab', 'A', 'Bb', 'B'])
fig.suptitle('original key: %s' % tone_labels[key])
ax.set_thetagrids((90-np.arange(0, 360, 360/12)) % 360, tone_labels[relative_idx]);

def update(values):
    values = values[idx]
    markerline.set_ydata(values)
    for sl, value in zip(stemlines, values):
        sl.set_ydata([0, value])

def make_frame_mpl(t):
    i = int(t * fps)
    r = data[i]
    r = r / data.max()
    update(r)
    return mplfig_to_npimage(fig) # RGB image of the figure

animation = mpy.VideoClip(make_frame_mpl, duration=duration)
animation.audio = mpy.AudioFileClip(audio_file)
animation.write_videofile(video_file, fps=fps)
	"""
	What pitch classes are playing?

	video: https://www.youtube.com/watch?v=DOJyjMQHP8U

	We computed a chromagram, ie. a sequence of pitch class vectors in
	time using the Python tfr library (https://github.com/bzamecnik/tfr)
	and animated it with matplotlib and moviepy. The tfr library computes
	very sharp spectrograms and allows to transform frequencies to pitches.
	Pitches are folded to classes by ignoring the octave producing
	a chromagram. It is then smoothed by median filter to get rid of
	percussive components.

	Music: Taberna Folk: Greensleeves
	(https://www.youtube.com/watch?v=pguNvlp5w-4) - CC BY

	This is an excerpts from the beginning of Greensleeves. The song is
	in the key of Eb. Pitch classes are shown relative to this key for
	easier understanding (like if we transposed it to the key of C).

	Chords (source: https://tabs.ultimate-guitar.com/m/misc_traditional/greensleeves_crd.htm):

	Am7 Cmaj7 \| G Em \| Am \| E

	Basically I tried to replicate the basic functionality of the HarmonEye
	(http://harmoneye.com) app, that I wrote previously in Java/Android,
	now using my the tfr library and other Python libs so that it's easy to
	produce a video directly.
	"""

	import matplotlib as mpl
	mpl.use('Agg')
	import matplotlib.pyplot as plt
	import moviepy.editor as mpy
	from moviepy.video.io.bindings import mplfig_to_npimage
	import numpy as np
	from scipy.signal import medfilt
	import tfr

	# --- parameters ---
	audio_file = 'green-sleeves-intro.flac'
	video_file = "green-sleeves-chroma-polar.mp4"
	fps = 30
	key = 3
	fifths = False

	# ------

	signal_frames = tfr.SignalFrames(audio_file, frame_size=4096, hop_size=512)

	fs = signal_frames.sample_rate
	output_frame_size = fs / fps

	X_pitchgram = tfr.pitchgram(signal_frames, output_frame_size=output_frame_size, magnitudes='power_db_normalized')

	X_octave_chromagram = X_pitchgram[:,:115//12*12].reshape(-1, 115//12, 12)
	X_chromagram = X_octave_chromagram.mean(axis=1)
	X_chromagram = (X_chromagram + 120) / 120

	X_chromagram_harmonic = medfilt(X_chromagram, (15, 1))
	X_chromagram_percussive = medfilt(X_chromagram, (1, 15))


	frame_count = len(X_chromagram)
	duration = frame_count / fps

	data = X_chromagram_harmonic

	step = 7 if fifths else 1
	idx = (step * (np.arange(12) + key + 12)) % 12
	relative_idx = (step * np.arange(12)) % 12

	fig = plt.figure(figsize=(10, 6), facecolor='white')
	ax = fig.add_subplot(111, projection='polar')
	theta = np.arange(12)/122np.pi*-1 + np.pi/2
	r = data[0]
	r = r / np.sqrt((r**2).sum())
	# r = r / data.max()
	markerline, stemlines, baseline = ax.stem(theta, r)
	ax.set_yticklabels([])
	ax.set_rmax(1.0)
	tone_labels = np.array(['C', 'Db', 'D', 'Eb', 'E', 'F', 'Gb', 'G', 'Ab', 'A', 'Bb', 'B'])
	fig.suptitle('original key: %s' % tone_labels[key])
	ax.set_thetagrids((90-np.arange(0, 360, 360/12)) % 360, tone_labels[relative_idx]);

	def update(values):
	values = values[idx]
	markerline.set_ydata(values)
	for sl, value in zip(stemlines, values):
	sl.set_ydata([0, value])

	def make_frame_mpl(t):
	i = int(t * fps)
	r = data[i]
	r = r / data.max()
	update(r)
	return mplfig_to_npimage(fig) # RGB image of the figure

	animation = mpy.VideoClip(make_frame_mpl, duration=duration)
	animation.audio = mpy.AudioFileClip(audio_file)
	animation.write_videofile(video_file, fps=fps)