jvbalen/waveform.py

## waveform.py

import os
import sys

import imageio
import numpy as np
import librosa as lr
import matplotlib.pyplot as plt
from tqdm import tqdm


def plot_wav(y, sr, start=0, stop=None, bit_depth=5, upsample=2, downsample=2):

    fig, ax = plt.subplots(1, 1, figsize=[13, 5])
    t = lr.samples_to_time(np.arange(len(y)), sr=sr)
    t, y = t[start:stop], y[start:stop]

    # show unquantized waveform
    new_sr = sr * upsample
    y_cont = lr.resample(y, sr, new_sr)
    t_cont = lr.samples_to_time(np.arange(len(y_cont)), sr=new_sr) + np.min(t)
    ax.plot(t_cont, y_cont, '-', color='k', linewidth=0.7)

    # plot quantized waveform
    new_sr = sr / downsample
    y_ = lr.resample(y, sr, new_sr)
    t_ = t[::downsample]
    t_ = np.repeat(t_, 2)[1:]
    y_ = np.repeat(y_, 2)[:-1]
    ax.plot(t_, y_, '-', color='k')

    # add "scale bar"
    bar_center = np.mean(ax.get_xlim())
    bar_len = 10**np.floor(np.log10(lr.samples_to_time(stop-start, sr=sr)))
    bar_start = bar_center - bar_len/2
    bar_stop = bar_center + bar_len/2
    ax.plot([bar_start, bar_stop], [-0.8, -0.8], 'k', linewidth=1.5)
    ax.text(bar_center, -0.7, format_small_time_interval(bar_len), size=14, horizontalalignment='center')

    # add grid
    if stop - start < 100:
        grid_color = [(stop - start) / 100] * 3

        grid_amplitude_step = 1/(2**(bit_depth-1))
        grid_amplitudes = np.arange(-1, 1+grid_amplitude_step, grid_amplitude_step)
        plt.hlines(grid_amplitudes, t[0], t[-1], color=grid_color, linewidth=0.5, linestyle='--')

        grid_samples = np.arange(start, stop, downsample)
        grid_times = lr.samples_to_time(grid_samples, sr=sr)
        plt.vlines(grid_times, -1, 1, color=grid_color, linewidth=0.5, linestyle='--')

    # turn off axes
    ax.set_ylim([-1.05, 1.05])
    ax.axis('off')

    return fig


def format_small_time_interval(t):

    if t >= 1.0:
        return f'{t:.3g} s'
    elif t >= 1e-3:
        return f'{t * 1e3:.3g} ms'
    else:
        return f'{t * 1e6:.3g} μs'


def half_cosine_window(n, min_=0.0, max_=1.0):

    win = np.cos(np.arange(0., np.pi, np.pi / n)) * 0.5 + 0.5
    win = (max_ - min_) * win + min_

    return win


if __name__ == '__main__':

    path = sys.argv[1]  # path to audio file, only tried 44100Hz
    out_dir = sys.argv[2]  # must exist, will be spammed with png's

    # parameters
    max_amplitude = 0.65
    min_scale = 0.001 * 44100  # in samples
    max_scale = 1 * 44100
    center = 44100 * 2   # center of segment in samples
    zoom_frames = 100
    frame_rate = 12
    pause_frames = 12

    # load audio
    y, sr = lr.load(path, sr=None)
    y = max_amplitude * y / np.max(np.abs(y))

    # draw and save figures
    filenames = []
    scales = half_cosine_window(zoom_frames, min_=np.log10(min_scale), max_=np.log10(max_scale))
    for i, scale in enumerate(tqdm(scales)):
        start = int(center - 10**scale)
        stop = int(center + 10**scale)
        fig = plot_wav(y, sr, start=start, stop=stop, upsample=2, downsample=2);
        filename = os.path.join(out_dir, f'wave_{i + 1}.png')
        fig.savefig(filename, pad_inches=0.0, bbox_inches='tight')
        filenames.append(filename)
        plt.close(fig)

    # add pauses, make symmetric
    filenames = [filenames[0]] * (pause_frames // 2) + filenames + [filenames[-1]] * (pause_frames // 2)
    filenames = filenames + filenames[::-1]

    # write gif
    with imageio.get_writer(os.path.join(out_dir, 'wave.gif'), mode='I', duration=1./frame_rate) as writer:
        for filename in filenames:
            image = imageio.imread(filename)
            writer.append_data(image)

	import os
	import sys

	import imageio
	import numpy as np
	import librosa as lr
	import matplotlib.pyplot as plt
	from tqdm import tqdm


	def plot_wav(y, sr, start=0, stop=None, bit_depth=5, upsample=2, downsample=2):

	fig, ax = plt.subplots(1, 1, figsize=[13, 5])
	t = lr.samples_to_time(np.arange(len(y)), sr=sr)
	t, y = t[start:stop], y[start:stop]

	# show unquantized waveform
	new_sr = sr * upsample
	y_cont = lr.resample(y, sr, new_sr)
	t_cont = lr.samples_to_time(np.arange(len(y_cont)), sr=new_sr) + np.min(t)
	ax.plot(t_cont, y_cont, '-', color='k', linewidth=0.7)

	# plot quantized waveform
	new_sr = sr / downsample
	y_ = lr.resample(y, sr, new_sr)
	t_ = t[::downsample]
	t_ = np.repeat(t_, 2)[1:]
	y_ = np.repeat(y_, 2)[:-1]
	ax.plot(t_, y_, '-', color='k')

	# add "scale bar"
	bar_center = np.mean(ax.get_xlim())
	bar_len = 10**np.floor(np.log10(lr.samples_to_time(stop-start, sr=sr)))
	bar_start = bar_center - bar_len/2
	bar_stop = bar_center + bar_len/2
	ax.plot([bar_start, bar_stop], [-0.8, -0.8], 'k', linewidth=1.5)
	ax.text(bar_center, -0.7, format_small_time_interval(bar_len), size=14, horizontalalignment='center')

	# add grid
	if stop - start < 100:
	grid_color = [(stop - start) / 100] * 3

	grid_amplitude_step = 1/(2**(bit_depth-1))
	grid_amplitudes = np.arange(-1, 1+grid_amplitude_step, grid_amplitude_step)
	plt.hlines(grid_amplitudes, t[0], t[-1], color=grid_color, linewidth=0.5, linestyle='--')

	grid_samples = np.arange(start, stop, downsample)
	grid_times = lr.samples_to_time(grid_samples, sr=sr)
	plt.vlines(grid_times, -1, 1, color=grid_color, linewidth=0.5, linestyle='--')

	# turn off axes
	ax.set_ylim([-1.05, 1.05])
	ax.axis('off')

	return fig


	def format_small_time_interval(t):

	if t >= 1.0:
	return f'{t:.3g} s'
	elif t >= 1e-3:
	return f'{t * 1e3:.3g} ms'
	else:
	return f'{t * 1e6:.3g} μs'


	def half_cosine_window(n, min_=0.0, max_=1.0):

	win = np.cos(np.arange(0., np.pi, np.pi / n)) * 0.5 + 0.5
	win = (max_ - min_) * win + min_

	return win


	if __name__ == '__main__':

	path = sys.argv[1] # path to audio file, only tried 44100Hz
	out_dir = sys.argv[2] # must exist, will be spammed with png's

	# parameters
	max_amplitude = 0.65
	min_scale = 0.001 * 44100 # in samples
	max_scale = 1 * 44100
	center = 44100 * 2 # center of segment in samples
	zoom_frames = 100
	frame_rate = 12
	pause_frames = 12

	# load audio
	y, sr = lr.load(path, sr=None)
	y = max_amplitude * y / np.max(np.abs(y))

	# draw and save figures
	filenames = []
	scales = half_cosine_window(zoom_frames, min_=np.log10(min_scale), max_=np.log10(max_scale))
	for i, scale in enumerate(tqdm(scales)):
	start = int(center - 10**scale)
	stop = int(center + 10**scale)
	fig = plot_wav(y, sr, start=start, stop=stop, upsample=2, downsample=2);
	filename = os.path.join(out_dir, f'wave_{i + 1}.png')
	fig.savefig(filename, pad_inches=0.0, bbox_inches='tight')
	filenames.append(filename)
	plt.close(fig)

	# add pauses, make symmetric
	filenames = [filenames[0]] * (pause_frames // 2) + filenames + [filenames[-1]] * (pause_frames // 2)
	filenames = filenames + filenames[::-1]

	# write gif
	with imageio.get_writer(os.path.join(out_dir, 'wave.gif'), mode='I', duration=1./frame_rate) as writer:
	for filename in filenames:
	image = imageio.imread(filename)
	writer.append_data(image)