Skip to content

Instantly share code, notes, and snippets.

@JasonBenn
Created February 14, 2020 23:50
Show Gist options
  • Save JasonBenn/2ef6c2cbee1afda873baec8e7010c075 to your computer and use it in GitHub Desktop.
Save JasonBenn/2ef6c2cbee1afda873baec8e7010c075 to your computer and use it in GitHub Desktop.
audio transcription poc
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "pip install adtlib librosa spleeter cython adtlib matplotlib",
"execution_count": null,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Music IR from https://musicinformationretrieval.com/"
},
{
"metadata": {},
"cell_type": "markdown",
"source": "# Download song"
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "import IPython\nimport IPython.display as ipd\nimport librosa\nfrom ADTLib import ADT",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "song_name = \"give_you_up\"",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "%time !youtube-dl --extract-audio --audio-format mp3 --keep-video https://www.youtube.com/watch?v=dQw4w9WgXcQ -o \"{song_name}.%(ext)s\"",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "ipd.HTML(f'<audio controls src=\"/files/{song_name}.mp3\"></audio>')",
"execution_count": null,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "# Split song"
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "%time !spleeter separate -i {song_name}.mp3 -p spleeter:5stems -o .",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "piano_filename = f\"{song_name}/piano.wav\"\nbass_filename = f\"{song_name}/bass.wav\"\nother_filename = f\"{song_name}/other.wav\"\nvocals_filename = f\"{song_name}/vocals.wav\"\ndrums_filename = f\"{song_name}/drums.wav\"",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "ipd.HTML(f'<audio controls src=\"/files/{drums_filename}\"></audio>')",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "partial_song_filename = f\"{song_name}_drumless.mp3\"\n!rm -f {partial_song_filename}\n%time !ffmpeg -i {song_name}/piano.wav -i {song_name}/bass.wav -i {song_name}/other.wav -i {song_name}/vocals.wav -filter_complex amix=inputs=4:duration=longest {partial_song_filename}",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "ipd.HTML(f'<audio controls src=\"/files/{bass_filename}\"></audio>')",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "ipd.HTML(f'<audio controls src=\"/files/{piano_filename}\"></audio>')",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "ipd.HTML(f'<audio controls src=\"/files/{partial_song_filename}\"></audio>')",
"execution_count": null,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "# Transcribe drums"
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "%time x, sr = librosa.load(drums_filename)",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "%time drum_onsets = ADT([drums_filename])[0]",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "drum_onsets",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "clicks = librosa.clicks(times=drum_onsets['Kick'], sr=sr, length=len(x))\nipd.Audio(x + clicks, rate=sr)",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "clicks = librosa.clicks(times=drum_onsets['Snare'], sr=sr, length=len(x))\nipd.Audio(x + clicks, rate=sr)",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "clicks = librosa.clicks(times=drum_onsets['Hihat'], sr=sr, length=len(x))\nipd.Audio(x + clicks, rate=sr)",
"execution_count": null,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "### Question: do we want to sample the kick, snare, and hihat noise, and replay them when you play along on a MIDI drumkit?"
},
{
"metadata": {},
"cell_type": "markdown",
"source": "# Transcribe vocals\n\nhttps://musicinformationretrieval.com/pitch_transcription_exercise.html\n\nMight help to also backtrack from onsets?\nhttps://musicinformationretrieval.com/onset_segmentation.html"
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "%time x, sr = librosa.load(vocals_filename)",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "sr, x",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "ipd.Audio(x, rate=sr)",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "from librosa import amplitude_to_db\nbins_per_octave = 36\ncqt = librosa.cqt(x, sr=sr, n_bins=300, bins_per_octave=bins_per_octave)\nlog_cqt = amplitude_to_db(cqt)",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "import librosa.display\nlibrosa.display.specshow(log_cqt, sr=sr, x_axis='time', y_axis='cqt_note', \n bins_per_octave=bins_per_octave)",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "import matplotlib.pyplot as plt",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "hop_length = 100\nonset_env = librosa.onset.onset_strength(x, sr=sr, hop_length=hop_length)\nplt.plot(onset_env)\nplt.xlim(0, len(onset_env))",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "onset_samples = librosa.onset.onset_detect(x,\n sr=sr, units='samples', \n hop_length=hop_length, \n backtrack=False,\n pre_max=20,\n post_max=20,\n pre_avg=100,\n post_avg=100,\n delta=0.2,\n wait=0)",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "onset_samples",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "onset_boundaries = numpy.concatenate([[0], onset_samples, [len(x)]])",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "onset_times = librosa.samples_to_time(onset_boundaries, sr=sr)\nlibrosa.display.waveplot(x, sr=sr)\nplt.vlines(onset_times, -1, 1, color='r')",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "def estimate_pitch(segment, sr, fmin=50.0, fmax=2000.0):\n \n # Compute autocorrelation of input segment.\n r = librosa.autocorrelate(segment)\n \n # Define lower and upper limits for the autocorrelation argmax.\n i_min = sr/fmax\n i_max = sr/fmin\n r[:int(i_min)] = 0\n r[int(i_max):] = 0\n \n # Find the location of the maximum autocorrelation.\n i = r.argmax()\n f0 = float(sr)/i\n return f0\n\ndef generate_sine(f0, sr, n_duration):\n n = numpy.arange(n_duration)\n return 0.2*numpy.sin(2*numpy.pi*f0*n/float(sr))\n\ndef estimate_pitch_and_generate_sine(x, onset_samples, i, sr):\n n0 = onset_samples[i]\n n1 = onset_samples[i+1]\n f0 = estimate_pitch(x[n0:n1], sr)\n return generate_sine(f0, sr, n1-n0)\n",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "y = numpy.concatenate([\n estimate_pitch_and_generate_sine(x, onset_boundaries, i, sr=sr)\n for i in range(len(onset_boundaries)-1)\n])\n",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "ipd.Audio(y, rate=sr)",
"execution_count": null,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "# Transcribe bass"
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## Get sample"
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "bass_sample_filename = \"bass_sample.wav\"",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "!rm -f {bass_sample_filename}\n!ffmpeg -i {bass_filename} -ss 6 -t 4 {bass_sample_filename}",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "ipd.HTML(f'<audio controls src=\"/files/{bass_sample_filename}\"></audio>')",
"execution_count": null,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "input: x, sr\noutput: "
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "x, sr = librosa.load(bass_sample_filename)",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "def get_onset_boundaries(x, sr):\n hop_length = 100\n onset_env = librosa.onset.onset_strength(x, sr=sr, hop_length=hop_length)\n onset_samples = librosa.onset.onset_detect(x,\n sr=sr, units='samples', \n hop_length=hop_length, \n backtrack=False,\n pre_max=20,\n post_max=20,\n pre_avg=100,\n post_avg=100,\n delta=0.2,\n wait=0)\n return numpy.concatenate([[0], onset_samples, [len(x)]])",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "onset_boundaries = get_onset_boundaries(x, sr)",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "def chart_onset_times(x, sr, onset_boundaries):\n onset_times = librosa.samples_to_time(onset_boundaries, sr=sr)\n librosa.display.waveplot(x, sr=sr)\n plt.vlines(onset_times, -1, 1, color='r')",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "chart_onset_times(x, sr, onset_boundaries)",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "y = numpy.concatenate([\n estimate_pitch_and_generate_sine(x, onset_boundaries, i, sr=sr)\n for i in range(len(onset_boundaries)-1)\n])",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "ipd.Audio(y, rate=sr)",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.7.4",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"gist": {
"id": "",
"data": {
"description": "audio transcription poc",
"public": true
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment