JasonBenn/audio_transcription.ipynb

## audio_transcription.ipynb
{
  "cells": [
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "pip install adtlib librosa spleeter cython adtlib matplotlib",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Music IR from https://musicinformationretrieval.com/"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "# Download song"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "import IPython\nimport IPython.display as ipd\nimport librosa\nfrom ADTLib import ADT",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "song_name = \"give_you_up\"",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "%time !youtube-dl --extract-audio --audio-format mp3 --keep-video https://www.youtube.com/watch?v=dQw4w9WgXcQ -o \"{song_name}.%(ext)s\"",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "ipd.HTML(f'<audio controls src=\"/files/{song_name}.mp3\"></audio>')",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "# Split song"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "%time !spleeter separate -i {song_name}.mp3 -p spleeter:5stems -o .",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "piano_filename = f\"{song_name}/piano.wav\"\nbass_filename = f\"{song_name}/bass.wav\"\nother_filename = f\"{song_name}/other.wav\"\nvocals_filename = f\"{song_name}/vocals.wav\"\ndrums_filename = f\"{song_name}/drums.wav\"",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "ipd.HTML(f'<audio controls src=\"/files/{drums_filename}\"></audio>')",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "partial_song_filename = f\"{song_name}_drumless.mp3\"\n!rm -f {partial_song_filename}\n%time !ffmpeg -i {song_name}/piano.wav -i {song_name}/bass.wav -i {song_name}/other.wav -i {song_name}/vocals.wav -filter_complex amix=inputs=4:duration=longest {partial_song_filename}",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "ipd.HTML(f'<audio controls src=\"/files/{bass_filename}\"></audio>')",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "ipd.HTML(f'<audio controls src=\"/files/{piano_filename}\"></audio>')",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "ipd.HTML(f'<audio controls src=\"/files/{partial_song_filename}\"></audio>')",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "# Transcribe drums"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "%time x, sr = librosa.load(drums_filename)",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "%time drum_onsets = ADT([drums_filename])[0]",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "drum_onsets",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "clicks = librosa.clicks(times=drum_onsets['Kick'], sr=sr, length=len(x))\nipd.Audio(x + clicks, rate=sr)",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "clicks = librosa.clicks(times=drum_onsets['Snare'], sr=sr, length=len(x))\nipd.Audio(x + clicks, rate=sr)",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "clicks = librosa.clicks(times=drum_onsets['Hihat'], sr=sr, length=len(x))\nipd.Audio(x + clicks, rate=sr)",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "### Question: do we want to sample the kick, snare, and hihat noise, and replay them when you play along on a MIDI drumkit?"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "# Transcribe vocals\n\nhttps://musicinformationretrieval.com/pitch_transcription_exercise.html\n\nMight help to also backtrack from onsets?\nhttps://musicinformationretrieval.com/onset_segmentation.html"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "%time x, sr = librosa.load(vocals_filename)",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "sr, x",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "ipd.Audio(x, rate=sr)",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "from librosa import amplitude_to_db\nbins_per_octave = 36\ncqt = librosa.cqt(x, sr=sr, n_bins=300, bins_per_octave=bins_per_octave)\nlog_cqt = amplitude_to_db(cqt)",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "import librosa.display\nlibrosa.display.specshow(log_cqt, sr=sr, x_axis='time', y_axis='cqt_note', \n                         bins_per_octave=bins_per_octave)",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "import matplotlib.pyplot as plt",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "hop_length = 100\nonset_env = librosa.onset.onset_strength(x, sr=sr, hop_length=hop_length)\nplt.plot(onset_env)\nplt.xlim(0, len(onset_env))",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "onset_samples = librosa.onset.onset_detect(x,\n                                           sr=sr, units='samples', \n                                           hop_length=hop_length, \n                                           backtrack=False,\n                                           pre_max=20,\n                                           post_max=20,\n                                           pre_avg=100,\n                                           post_avg=100,\n                                           delta=0.2,\n                                           wait=0)",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "onset_samples",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "onset_boundaries = numpy.concatenate([[0], onset_samples, [len(x)]])",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "onset_times = librosa.samples_to_time(onset_boundaries, sr=sr)\nlibrosa.display.waveplot(x, sr=sr)\nplt.vlines(onset_times, -1, 1, color='r')",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "def estimate_pitch(segment, sr, fmin=50.0, fmax=2000.0):\n    \n    # Compute autocorrelation of input segment.\n    r = librosa.autocorrelate(segment)\n    \n    # Define lower and upper limits for the autocorrelation argmax.\n    i_min = sr/fmax\n    i_max = sr/fmin\n    r[:int(i_min)] = 0\n    r[int(i_max):] = 0\n    \n    # Find the location of the maximum autocorrelation.\n    i = r.argmax()\n    f0 = float(sr)/i\n    return f0\n\ndef generate_sine(f0, sr, n_duration):\n    n = numpy.arange(n_duration)\n    return 0.2*numpy.sin(2*numpy.pi*f0*n/float(sr))\n\ndef estimate_pitch_and_generate_sine(x, onset_samples, i, sr):\n    n0 = onset_samples[i]\n    n1 = onset_samples[i+1]\n    f0 = estimate_pitch(x[n0:n1], sr)\n    return generate_sine(f0, sr, n1-n0)\n",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "y = numpy.concatenate([\n    estimate_pitch_and_generate_sine(x, onset_boundaries, i, sr=sr)\n    for i in range(len(onset_boundaries)-1)\n])\n",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "ipd.Audio(y, rate=sr)",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "# Transcribe bass"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## Get sample"
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "bass_sample_filename = \"bass_sample.wav\"",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "!rm -f {bass_sample_filename}\n!ffmpeg -i {bass_filename} -ss 6 -t 4 {bass_sample_filename}",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "ipd.HTML(f'<audio controls src=\"/files/{bass_sample_filename}\"></audio>')",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "input: x, sr\noutput: "
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "x, sr = librosa.load(bass_sample_filename)",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "def get_onset_boundaries(x, sr):\n    hop_length = 100\n    onset_env = librosa.onset.onset_strength(x, sr=sr, hop_length=hop_length)\n    onset_samples = librosa.onset.onset_detect(x,\n                                           sr=sr, units='samples', \n                                           hop_length=hop_length, \n                                           backtrack=False,\n                                           pre_max=20,\n                                           post_max=20,\n                                           pre_avg=100,\n                                           post_avg=100,\n                                           delta=0.2,\n                                           wait=0)\n    return numpy.concatenate([[0], onset_samples, [len(x)]])",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "onset_boundaries = get_onset_boundaries(x, sr)",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "def chart_onset_times(x, sr, onset_boundaries):\n    onset_times = librosa.samples_to_time(onset_boundaries, sr=sr)\n    librosa.display.waveplot(x, sr=sr)\n    plt.vlines(onset_times, -1, 1, color='r')",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "chart_onset_times(x, sr, onset_boundaries)",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "y = numpy.concatenate([\n    estimate_pitch_and_generate_sine(x, onset_boundaries, i, sr=sr)\n    for i in range(len(onset_boundaries)-1)\n])",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "ipd.Audio(y, rate=sr)",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3",
      "language": "python"
    },
    "language_info": {
      "name": "python",
      "version": "3.7.4",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    },
    "gist": {
      "id": "",
      "data": {
        "description": "audio transcription poc",
        "public": true
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 4
}
	{
	"cells": [
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "pip install adtlib librosa spleeter cython adtlib matplotlib",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "Music IR from https://musicinformationretrieval.com/"
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "# Download song"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "import IPython\nimport IPython.display as ipd\nimport librosa\nfrom ADTLib import ADT",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "song_name = \"give_you_up\"",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "%time !youtube-dl --extract-audio --audio-format mp3 --keep-video https://www.youtube.com/watch?v=dQw4w9WgXcQ -o \"{song_name}.%(ext)s\"",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "ipd.HTML(f'<audio controls src=\"/files/{song_name}.mp3\"></audio>')",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "# Split song"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "%time !spleeter separate -i {song_name}.mp3 -p spleeter:5stems -o .",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "piano_filename = f\"{song_name}/piano.wav\"\nbass_filename = f\"{song_name}/bass.wav\"\nother_filename = f\"{song_name}/other.wav\"\nvocals_filename = f\"{song_name}/vocals.wav\"\ndrums_filename = f\"{song_name}/drums.wav\"",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "ipd.HTML(f'<audio controls src=\"/files/{drums_filename}\"></audio>')",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "partial_song_filename = f\"{song_name}_drumless.mp3\"\n!rm -f {partial_song_filename}\n%time !ffmpeg -i {song_name}/piano.wav -i {song_name}/bass.wav -i {song_name}/other.wav -i {song_name}/vocals.wav -filter_complex amix=inputs=4:duration=longest {partial_song_filename}",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "ipd.HTML(f'<audio controls src=\"/files/{bass_filename}\"></audio>')",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "ipd.HTML(f'<audio controls src=\"/files/{piano_filename}\"></audio>')",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "ipd.HTML(f'<audio controls src=\"/files/{partial_song_filename}\"></audio>')",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "# Transcribe drums"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "%time x, sr = librosa.load(drums_filename)",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "%time drum_onsets = ADT([drums_filename])[0]",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "drum_onsets",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "clicks = librosa.clicks(times=drum_onsets['Kick'], sr=sr, length=len(x))\nipd.Audio(x + clicks, rate=sr)",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "clicks = librosa.clicks(times=drum_onsets['Snare'], sr=sr, length=len(x))\nipd.Audio(x + clicks, rate=sr)",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "clicks = librosa.clicks(times=drum_onsets['Hihat'], sr=sr, length=len(x))\nipd.Audio(x + clicks, rate=sr)",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "### Question: do we want to sample the kick, snare, and hihat noise, and replay them when you play along on a MIDI drumkit?"
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "# Transcribe vocals\n\nhttps://musicinformationretrieval.com/pitch_transcription_exercise.html\n\nMight help to also backtrack from onsets?\nhttps://musicinformationretrieval.com/onset_segmentation.html"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "%time x, sr = librosa.load(vocals_filename)",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "sr, x",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "ipd.Audio(x, rate=sr)",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "from librosa import amplitude_to_db\nbins_per_octave = 36\ncqt = librosa.cqt(x, sr=sr, n_bins=300, bins_per_octave=bins_per_octave)\nlog_cqt = amplitude_to_db(cqt)",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "import librosa.display\nlibrosa.display.specshow(log_cqt, sr=sr, x_axis='time', y_axis='cqt_note', \n bins_per_octave=bins_per_octave)",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "import matplotlib.pyplot as plt",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "hop_length = 100\nonset_env = librosa.onset.onset_strength(x, sr=sr, hop_length=hop_length)\nplt.plot(onset_env)\nplt.xlim(0, len(onset_env))",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "onset_samples = librosa.onset.onset_detect(x,\n sr=sr, units='samples', \n hop_length=hop_length, \n backtrack=False,\n pre_max=20,\n post_max=20,\n pre_avg=100,\n post_avg=100,\n delta=0.2,\n wait=0)",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "onset_samples",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "onset_boundaries = numpy.concatenate([[0], onset_samples, [len(x)]])",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "onset_times = librosa.samples_to_time(onset_boundaries, sr=sr)\nlibrosa.display.waveplot(x, sr=sr)\nplt.vlines(onset_times, -1, 1, color='r')",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "def estimate_pitch(segment, sr, fmin=50.0, fmax=2000.0):\n \n # Compute autocorrelation of input segment.\n r = librosa.autocorrelate(segment)\n \n # Define lower and upper limits for the autocorrelation argmax.\n i_min = sr/fmax\n i_max = sr/fmin\n r[:int(i_min)] = 0\n r[int(i_max):] = 0\n \n # Find the location of the maximum autocorrelation.\n i = r.argmax()\n f0 = float(sr)/i\n return f0\n\ndef generate_sine(f0, sr, n_duration):\n n = numpy.arange(n_duration)\n return 0.2numpy.sin(2numpy.pif0n/float(sr))\n\ndef estimate_pitch_and_generate_sine(x, onset_samples, i, sr):\n n0 = onset_samples[i]\n n1 = onset_samples[i+1]\n f0 = estimate_pitch(x[n0:n1], sr)\n return generate_sine(f0, sr, n1-n0)\n",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "y = numpy.concatenate([\n estimate_pitch_and_generate_sine(x, onset_boundaries, i, sr=sr)\n for i in range(len(onset_boundaries)-1)\n])\n",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "ipd.Audio(y, rate=sr)",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "# Transcribe bass"
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "## Get sample"
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "bass_sample_filename = \"bass_sample.wav\"",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "!rm -f {bass_sample_filename}\n!ffmpeg -i {bass_filename} -ss 6 -t 4 {bass_sample_filename}",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "ipd.HTML(f'<audio controls src=\"/files/{bass_sample_filename}\"></audio>')",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "input: x, sr\noutput: "
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "x, sr = librosa.load(bass_sample_filename)",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "def get_onset_boundaries(x, sr):\n hop_length = 100\n onset_env = librosa.onset.onset_strength(x, sr=sr, hop_length=hop_length)\n onset_samples = librosa.onset.onset_detect(x,\n sr=sr, units='samples', \n hop_length=hop_length, \n backtrack=False,\n pre_max=20,\n post_max=20,\n pre_avg=100,\n post_avg=100,\n delta=0.2,\n wait=0)\n return numpy.concatenate([[0], onset_samples, [len(x)]])",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "onset_boundaries = get_onset_boundaries(x, sr)",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "def chart_onset_times(x, sr, onset_boundaries):\n onset_times = librosa.samples_to_time(onset_boundaries, sr=sr)\n librosa.display.waveplot(x, sr=sr)\n plt.vlines(onset_times, -1, 1, color='r')",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "chart_onset_times(x, sr, onset_boundaries)",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "y = numpy.concatenate([\n estimate_pitch_and_generate_sine(x, onset_boundaries, i, sr=sr)\n for i in range(len(onset_boundaries)-1)\n])",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "ipd.Audio(y, rate=sr)",
	"execution_count": null,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true
	},
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	}
	],
	"metadata": {
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3",
	"language": "python"
	},
	"language_info": {
	"name": "python",
	"version": "3.7.4",
	"mimetype": "text/x-python",
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"pygments_lexer": "ipython3",
	"nbconvert_exporter": "python",
	"file_extension": ".py"
	},
	"gist": {
	"id": "",
	"data": {
	"description": "audio transcription poc",
	"public": true
	}
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}