Skip to content

Instantly share code, notes, and snippets.

@RichardJohnn
Forked from kylemcdonald/split-transients.py
Created February 9, 2023 23:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save RichardJohnn/02b32bcfe7041bb55d757cb4ea776257 to your computer and use it in GitHub Desktop.
Save RichardJohnn/02b32bcfe7041bb55d757cb4ea776257 to your computer and use it in GitHub Desktop.
Split an audio file into multiple files based on detected onsets from librosa.
#!/usr/bin/env python
import argparse
import soundfile
import librosa
import numpy as np
import os
from progressbar import ProgressBar, Percentage, Bar
parser = argparse.ArgumentParser(
description='Split audio into multiple files and save analysis.')
parser.add_argument('-i', '--input', type=str)
parser.add_argument('-o', '--output', type=str, default='transients')
parser.add_argument('-s', '--sr', type=int, default=44100)
args = parser.parse_args()
print(f'Loading {args.input}')
y, sr = librosa.load(args.input, sr=args.sr)
print('Calculating CQT')
C = np.abs(librosa.cqt(y=y, sr=sr))
print('Extracting onsets')
o_env = librosa.onset.onset_strength(y, sr=sr, S=librosa.amplitude_to_db(C, ref=np.max))
onset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=sr)
def prepare(y, sr=22050):
y = librosa.to_mono(y)
y = librosa.util.fix_length(y, sr) # 1 second of audio
y = librosa.util.normalize(y)
return y
def get_fingerprint(y, sr=22050):
y = prepare(y, sr)
cqt = librosa.cqt(y, sr=sr, hop_length=2048)
return cqt.flatten('F')
def normalize(x):
x -= x.min(axis=0)
x /= x.max(axis=0)
return x
def basename(file):
file = os.path.basename(file)
return os.path.splitext(file)[0]
vectors = []
words = []
filenames = []
onset_samples = list(librosa.frames_to_samples(onset_frames))
onset_samples = np.concatenate(onset_samples, len(y))
starts = onset_samples[0:-1]
stops = onset_samples[1:]
analysis_folder = args.output
samples_folder = os.path.join(args.output, 'samples')
num_segments = len(onset_samples)
print(f'Writing {num_segments} segments to {samples_folder}')
try:
os.makedirs(samples_folder)
except:
pass
pbar = ProgressBar(widgets=[Percentage(), Bar()], maxval=num_segments).start()
for i, (start, stop) in enumerate(zip(starts, stops)):
audio = y[start:stop]
filename = os.path.join(samples_folder, str(i) + '.wav')
soundfile.write(filename, audio, sr)
vector = get_fingerprint(audio, sr=sr)
word = basename(filename)
vectors.append(vector)
words.append(word)
filenames.append(filename)
pbar.update(i+1)
pbar.finish()
np.savetxt(os.path.join(analysis_folder, 'vectors'), vectors, fmt='%.5f', delimiter='\t')
np.savetxt(os.path.join(analysis_folder, 'words'), words, fmt='%s')
np.savetxt(os.path.join(analysis_folder, 'filenames.txt'), filenames, fmt='%s')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment