Skip to content

Instantly share code, notes, and snippets.

@nomolosvulgaris
Last active November 10, 2022 12:02
Show Gist options
  • Save nomolosvulgaris/59cd2f17024dc74508b5855461c75e4a to your computer and use it in GitHub Desktop.
Save nomolosvulgaris/59cd2f17024dc74508b5855461c75e4a to your computer and use it in GitHub Desktop.
import time
start_time = time.time()
start_time_all = start_time
import sys
import essentia
import essentia.standard as es
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import essentia.streaming as ess
import matplotlib.colors as colors
import math
import tqdm
labels_o="F#\nGb B\nCb E A D G C F A#\nBb D#\nEb G#\nAb C#\nDb".split(' ')
labels_i="D#m\nEbm G#m\nAbm C#m\nDbm F#m\nGbm Bm Em Am Dm Gm Cm Fm A#m\nBbm".split(' ')
key_colors={}
for i in range(0, 12):
hue=(i+5)/12
if hue>1: hue-=1
for key in labels_o[i].split("\n"):
key_colors[key]=colors.hsv_to_rgb([hue, 0.7, 0.9])
for key in labels_i[i].split("\n"):
key_colors[key]=colors.hsv_to_rgb([hue, 0.3, 0.9])
sample_rate = 44100
key_alpha = 0.8
class Analyzer:
def __init__(self, input_name):
self.input_name = input_name
self.loader = es.MonoLoader(filename=self.input_name, sampleRate=sample_rate)
self.audio = self.loader()
self.song_length=len(self.audio)/sample_rate
self.visualizer, (self.vis_audio, self.vis_camelot) = plt.subplots(1, 2, figsize=(20, 6), width_ratios=(15, 5))
plt.subplots_adjust(wspace=0, hspace=0)
for subplot in [self.vis_audio, self.vis_camelot]:
subplot.axes.set_xticklabels([])
subplot.axes.set_yticklabels([])
subplot.spines['top'].set_visible(False)
subplot.spines['right'].set_visible(False)
subplot.spines['bottom'].set_visible(False)
subplot.spines['left'].set_visible(False)
self.formatter = matplotlib.ticker.FuncFormatter(lambda s, x: time.strftime('%M:%S', time.gmtime(s)))
def draw_audio(self, subplot=False):
if subplot:
vis_audio = subplot.twinks()
else:
vis_audio = self.vis_audio
vis_audio.set_title('.'.join(self.input_name.split('.')[0:-1]))
vis_audio.plot(np.arange(len(self.audio))/sample_rate, self.audio)
vis_audio.xaxis.set_major_formatter(self.formatter)
vis_audio.yaxis.set_visible(False)
return vis_audio
def detect_bpms(self):
rhythm_extractor = es.RhythmExtractor2013(method="multifeature")
self.bpm, self.beats, _, _, beats_intervals = rhythm_extractor(self.audio)
beats_intervals = np.append(beats_intervals, beats_intervals[-1])
self.bpms = [60.0 / interval for interval in beats_intervals]
def draw_bpms(self):
if not hasattr(self, 'bpm'): self.detect_bpms()
self.vis_bpms = self.vis_audio.twinx()
self.vis_bpms.plot(self.beats, self.bpms, 'red')
self.vis_bpms.axhline(180, color='none') # plot top
self.vis_bpms.axhline(self.bpm, color='red')
self.vis_bpms.text(0, self.bpm+1, " BPM=%.2f" % self.bpm, fontsize=15, color='blue')
self.vis_bpms.xaxis.set_major_formatter(self.formatter)
self.vis_bpms.xaxis.set_visible(False)
self.vis_bpms.yaxis.set_ticks_position('left')
def detect_key_on_span(self, start_time=0, end_time=1000):
loader_stream = ess.EasyLoader(filename=self.input_name, sampleRate=sample_rate, startTime=start_time, endTime=end_time)
framecutter = ess.FrameCutter(frameSize=4096, hopSize=2048, silentFrames='noise')
windowing = ess.Windowing(type='blackmanharris62')
spectrum = ess.Spectrum()
spectralpeaks = ess.SpectralPeaks(orderBy='magnitude', magnitudeThreshold=0.00001, minFrequency=20, maxFrequency=3500, maxPeaks=60)
hpcp = ess.HPCP()
hpcp_key = ess.HPCP(size=36, referenceFrequency=sample_rate/100, bandPreset=False, minFrequency=20, maxFrequency=3500, weightType='cosine', nonLinear=False, windowSize=1.)
key = ess.Key(profileType='edma', numHarmonics=4, pcpSize=36, slope=0.6, usePolyphony=True, useThreeChords=True)
pool = essentia.Pool()
loader_stream.audio >> framecutter.signal
framecutter.frame >> windowing.frame >> spectrum.frame
spectrum.spectrum >> spectralpeaks.spectrum
spectralpeaks.magnitudes >> hpcp.magnitudes
spectralpeaks.frequencies >> hpcp.frequencies
spectralpeaks.magnitudes >> hpcp_key.magnitudes
spectralpeaks.frequencies >> hpcp_key.frequencies
hpcp_key.hpcp >> key.pcp
hpcp.hpcp >> (pool, 'tonal.hpcp')
key.key >> (pool, 'tonal.key_key')
key.scale >> (pool, 'tonal.key_scale')
key.strength >> (pool, 'tonal.key_strength')
essentia.run(loader_stream)
return pool['tonal.key_key'] + ('m' if pool['tonal.key_scale'] == 'minor' else '')
def detect_keys_on_chunks(self, beats_in_chunk=24):
if not hasattr(self, 'bpm'): self.detect_bpms()
id = 0
keys_on_chunks = []
def detect_chunk(prev_time, beat_time):
nonlocal keys_on_chunks
key = self.detect_key_on_span(prev_time, beat_time)
keys_on_chunks += [[prev_time, beat_time, key]]
prev_time = 0
for beat_time in tqdm.tqdm(self.beats):
id += 1
if id % beats_in_chunk == 0:
detect_chunk(prev_time, beat_time)
prev_time = beat_time
if beat_time != prev_time:
detect_chunk(prev_time, beat_time)
self.keys_on_chunks = keys_on_chunks
self.full_key = self.detect_key_on_span(0, 1e+7)
print("Key detected for the whole song is " + self.full_key)
def draw_camelot(self):
if not hasattr(self, 'keys_on_chunks'): self.detect_keys_on_chunks()
keys_on = set([chunk_and_key[2] for chunk_and_key in self.keys_on_chunks] + [self.full_key])
r=1.0
r1=r*1.0
r1_5=r*0.826
r2=r*0.7
r2_5=r*0.5
r3=r*0.3
def coordinates_by_circle(func, radius, parts=12, offset_deg=0):
return [func((i*(360/parts)+offset_deg)/180*math.pi)*radius for i in range(0, parts+1)]
x1 = coordinates_by_circle(math.cos, r1, offset_deg=15)
y1 = coordinates_by_circle(math.sin, r1, offset_deg=15)
x1_5 = coordinates_by_circle(math.cos, r1_5, offset_deg=30)
y1_5 = coordinates_by_circle(math.sin, r1_5, offset_deg=30)
x2 = coordinates_by_circle(math.cos, r2, offset_deg=15)
y2 = coordinates_by_circle(math.sin, r2, offset_deg=15)
x2_5 = coordinates_by_circle(math.cos, r2_5, offset_deg=30)
y2_5 = coordinates_by_circle(math.sin, r2_5, offset_deg=30)
x3 = coordinates_by_circle(math.cos, r3, offset_deg=15)
y3 = coordinates_by_circle(math.sin, r3, offset_deg=15)
self.vis_camelot.axis('equal')
self.vis_camelot.xaxis.set_visible(False)
self.vis_camelot.yaxis.set_visible(False)
self.vis_camelot.fill(x1, y1, facecolor='none', edgecolor='black', linewidth=3)
self.vis_camelot.fill(x3, y3, facecolor='none', edgecolor='black', linewidth=3)
for i in range(0, 12):
x1c=[x1[i], x1[i+1], x2[i+1], x2[i]]
y1c=[y1[i], y1[i+1], y2[i+1], y2[i]]
x2c=[x2[i], x2[i+1], x3[i+1], x3[i]]
y2c=[y2[i], y2[i+1], y3[i+1], y3[i]]
label_o_keys=labels_o[i].split("\n")
label_i_keys=labels_i[i].split("\n")
face_o = key_colors[label_o_keys[0]] if any(k in label_o_keys for k in keys_on) else 'none'
face_i = key_colors[label_i_keys[0]] if any(k in label_i_keys for k in keys_on) else 'none'
self.vis_camelot.fill(x1c, y1c, facecolor=face_o, edgecolor='black', linewidth=3, alpha=key_alpha)
self.vis_camelot.text(x1_5[i], y1_5[i], labels_o[i], horizontalalignment='center', verticalalignment='center', fontsize=12, fontweight='bold' if self.full_key in label_o_keys else 'normal')
self.vis_camelot.fill(x2c, y2c, facecolor=face_i, edgecolor='black', linewidth=3)
self.vis_camelot.text(x2_5[i], y2_5[i], labels_i[i], horizontalalignment='center', verticalalignment='center', fontsize=9, fontweight='bold' if self.full_key in label_i_keys else 'normal')
def draw_keys_on_chunks(self):
if not hasattr(self, 'keys_on_chunks'): self.detect_keys_on_chunks()
for key_on_chunk in self.keys_on_chunks:
self.vis_audio.axvspan(key_on_chunk[0], key_on_chunk[1], facecolor=key_colors[key_on_chunk[2]], alpha=key_alpha, fill=True)
self.vis_audio.yaxis.set_visible(False)
self.draw_camelot()
def show_plot(self):
plt.show()
def save_plot(self, output_name):
plt.savefig(output_name)
if __name__ == '__main__':
if len(sys.argv)>1:
print("Initiated in %s seconds." % (time.time() - start_time))
start_time = time.time()
analyzer = Analyzer(sys.argv[1])
print("Analyzer loaded in %s seconds." % (time.time() - start_time))
start_time = time.time()
analyzer.draw_bpms()
print("BPMs curve drawn in %s seconds." % (time.time() - start_time))
start_time = time.time()
analyzer.draw_keys_on_chunks()
print("Keys and scales visualized in %s seconds." % (time.time() - start_time))
start_time = time.time()
analyzer.draw_audio()
print("Audio wave peaks drawn in %s seconds." % (time.time() - start_time))
start_time = time.time()
print("%s (%d:%02d) done in %s seconds." % (analyzer.input_name, analyzer.song_length/60, analyzer.song_length%60, time.time() - start_time_all))
if len(sys.argv)>2:
analyzer.save_plot(sys.argv[2])
print("%s successfully saved in %s seconds." % (sys.argv[2], time.time() - start_time))
else:
analyzer.show_plot()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment