Created
February 14, 2020 23:50
-
-
Save JasonBenn/2ef6c2cbee1afda873baec8e7010c075 to your computer and use it in GitHub Desktop.
audio transcription poc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "pip install adtlib librosa spleeter cython adtlib matplotlib", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Music IR from https://musicinformationretrieval.com/" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "# Download song" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "import IPython\nimport IPython.display as ipd\nimport librosa\nfrom ADTLib import ADT", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "song_name = \"give_you_up\"", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "%time !youtube-dl --extract-audio --audio-format mp3 --keep-video https://www.youtube.com/watch?v=dQw4w9WgXcQ -o \"{song_name}.%(ext)s\"", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "ipd.HTML(f'<audio controls src=\"/files/{song_name}.mp3\"></audio>')", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "# Split song" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "%time !spleeter separate -i {song_name}.mp3 -p spleeter:5stems -o .", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "piano_filename = f\"{song_name}/piano.wav\"\nbass_filename = f\"{song_name}/bass.wav\"\nother_filename = f\"{song_name}/other.wav\"\nvocals_filename = f\"{song_name}/vocals.wav\"\ndrums_filename = f\"{song_name}/drums.wav\"", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "ipd.HTML(f'<audio controls src=\"/files/{drums_filename}\"></audio>')", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "partial_song_filename = f\"{song_name}_drumless.mp3\"\n!rm -f {partial_song_filename}\n%time !ffmpeg -i {song_name}/piano.wav -i {song_name}/bass.wav -i {song_name}/other.wav -i {song_name}/vocals.wav -filter_complex amix=inputs=4:duration=longest {partial_song_filename}", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "ipd.HTML(f'<audio controls src=\"/files/{bass_filename}\"></audio>')", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "ipd.HTML(f'<audio controls src=\"/files/{piano_filename}\"></audio>')", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "ipd.HTML(f'<audio controls src=\"/files/{partial_song_filename}\"></audio>')", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "# Transcribe drums" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "%time x, sr = librosa.load(drums_filename)", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "%time drum_onsets = ADT([drums_filename])[0]", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "drum_onsets", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "clicks = librosa.clicks(times=drum_onsets['Kick'], sr=sr, length=len(x))\nipd.Audio(x + clicks, rate=sr)", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "clicks = librosa.clicks(times=drum_onsets['Snare'], sr=sr, length=len(x))\nipd.Audio(x + clicks, rate=sr)", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "clicks = librosa.clicks(times=drum_onsets['Hihat'], sr=sr, length=len(x))\nipd.Audio(x + clicks, rate=sr)", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "### Question: do we want to sample the kick, snare, and hihat noise, and replay them when you play along on a MIDI drumkit?" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "# Transcribe vocals\n\nhttps://musicinformationretrieval.com/pitch_transcription_exercise.html\n\nMight help to also backtrack from onsets?\nhttps://musicinformationretrieval.com/onset_segmentation.html" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "%time x, sr = librosa.load(vocals_filename)", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "sr, x", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "ipd.Audio(x, rate=sr)", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "from librosa import amplitude_to_db\nbins_per_octave = 36\ncqt = librosa.cqt(x, sr=sr, n_bins=300, bins_per_octave=bins_per_octave)\nlog_cqt = amplitude_to_db(cqt)", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "import librosa.display\nlibrosa.display.specshow(log_cqt, sr=sr, x_axis='time', y_axis='cqt_note', \n bins_per_octave=bins_per_octave)", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "import matplotlib.pyplot as plt", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "hop_length = 100\nonset_env = librosa.onset.onset_strength(x, sr=sr, hop_length=hop_length)\nplt.plot(onset_env)\nplt.xlim(0, len(onset_env))", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "onset_samples = librosa.onset.onset_detect(x,\n sr=sr, units='samples', \n hop_length=hop_length, \n backtrack=False,\n pre_max=20,\n post_max=20,\n pre_avg=100,\n post_avg=100,\n delta=0.2,\n wait=0)", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "onset_samples", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "onset_boundaries = numpy.concatenate([[0], onset_samples, [len(x)]])", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "onset_times = librosa.samples_to_time(onset_boundaries, sr=sr)\nlibrosa.display.waveplot(x, sr=sr)\nplt.vlines(onset_times, -1, 1, color='r')", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "def estimate_pitch(segment, sr, fmin=50.0, fmax=2000.0):\n \n # Compute autocorrelation of input segment.\n r = librosa.autocorrelate(segment)\n \n # Define lower and upper limits for the autocorrelation argmax.\n i_min = sr/fmax\n i_max = sr/fmin\n r[:int(i_min)] = 0\n r[int(i_max):] = 0\n \n # Find the location of the maximum autocorrelation.\n i = r.argmax()\n f0 = float(sr)/i\n return f0\n\ndef generate_sine(f0, sr, n_duration):\n n = numpy.arange(n_duration)\n return 0.2*numpy.sin(2*numpy.pi*f0*n/float(sr))\n\ndef estimate_pitch_and_generate_sine(x, onset_samples, i, sr):\n n0 = onset_samples[i]\n n1 = onset_samples[i+1]\n f0 = estimate_pitch(x[n0:n1], sr)\n return generate_sine(f0, sr, n1-n0)\n", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "y = numpy.concatenate([\n estimate_pitch_and_generate_sine(x, onset_boundaries, i, sr=sr)\n for i in range(len(onset_boundaries)-1)\n])\n", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "ipd.Audio(y, rate=sr)", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "# Transcribe bass" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "## Get sample" | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "bass_sample_filename = \"bass_sample.wav\"", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "!rm -f {bass_sample_filename}\n!ffmpeg -i {bass_filename} -ss 6 -t 4 {bass_sample_filename}", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "ipd.HTML(f'<audio controls src=\"/files/{bass_sample_filename}\"></audio>')", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "input: x, sr\noutput: " | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "x, sr = librosa.load(bass_sample_filename)", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "def get_onset_boundaries(x, sr):\n hop_length = 100\n onset_env = librosa.onset.onset_strength(x, sr=sr, hop_length=hop_length)\n onset_samples = librosa.onset.onset_detect(x,\n sr=sr, units='samples', \n hop_length=hop_length, \n backtrack=False,\n pre_max=20,\n post_max=20,\n pre_avg=100,\n post_avg=100,\n delta=0.2,\n wait=0)\n return numpy.concatenate([[0], onset_samples, [len(x)]])", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "onset_boundaries = get_onset_boundaries(x, sr)", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "def chart_onset_times(x, sr, onset_boundaries):\n onset_times = librosa.samples_to_time(onset_boundaries, sr=sr)\n librosa.display.waveplot(x, sr=sr)\n plt.vlines(onset_times, -1, 1, color='r')", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "chart_onset_times(x, sr, onset_boundaries)", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "y = numpy.concatenate([\n estimate_pitch_and_generate_sine(x, onset_boundaries, i, sr=sr)\n for i in range(len(onset_boundaries)-1)\n])", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "ipd.Audio(y, rate=sr)", | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.7.4", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
}, | |
"gist": { | |
"id": "", | |
"data": { | |
"description": "audio transcription poc", | |
"public": true | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment