Skip to content

Instantly share code, notes, and snippets.

@avinashvarna
Created July 19, 2022 02:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save avinashvarna/d05668d7f720f249c843480826d6a30c to your computer and use it in GitHub Desktop.
Save avinashvarna/d05668d7f720f249c843480826d6a30c to your computer and use it in GitHub Desktop.
Segment audio file by amplitude
# -*- coding: utf-8 -*-
"""
Segment an audio file based on average amplitude
"""
import datetime
import numpy as np
import os
import matplotlib.pyplot as plt
from tqdm import tqdm
from pydub import AudioSegment
from skimage.filters import threshold_otsu
from scipy.signal import medfilt
from more_itertools import pairwise
def moving_average(a, n=3) :
ret = np.cumsum(a, dtype=float)
ret[n:] = ret[n:] - ret[:-n]
return ret[n - 1:] / n
def cut_points(t):
t = t[:-1] ^ t[1:]
indices = np.nonzero(t)[0]
return indices
def save_segments(audio, indices, window_width, orig_filename):
basename, ext = os.path.splitext(orig_filename)
ext = ext.strip('.')
for i, (start, stop) in enumerate(tqdm(pairwise(indices),
desc='Saving segments')):
# pydub does things in miliseconds
start *= window_width * 1000
stop *= window_width * 1000
segment = audio[start:stop]
segment.export(f'{basename}_{i}.{ext}', format=ext)
def split_file_by_amplitude(filename, window_width, threshold_factor,
filter_size):
# Load the file and convert to numpy array
audio = AudioSegment.from_file(filename)
samples = np.array(audio.get_array_of_samples())
# Pad to a multiple of the window
width = int(audio.frame_rate * window_width)
duration = round(len(samples) / width) * width
samples = np.pad(samples, (0, duration-len(samples)))
# Reshape into windows
s = samples.reshape((-1, width))
# Compute the mean of the audio over each window and apply a moving average
m = np.mean(np.abs(s), axis=1)
m = moving_average(m, filter_size)
# Determine a threshold to apply
thresh = threshold_factor * threshold_otsu(m)
t = m > thresh
# Apply a median filter to remove any spurious points
t1 = medfilt(t.astype(int), filter_size)
indices = cut_points(t1)
save_segments(audio, indices, window_width, filename)
if __name__ == "__main__":
_start = datetime.datetime.now()
filename = "03-1-PeriyALwAr Tiumuzi-01-02.mp3"
# window width in seconds
window_width = 1
# Factor to adjust the automatically found threshold by
threshold_factor = 0.75
# Size of filters applied to remove spurious points
filter_size = 5
split_file_by_amplitude(filename, window_width, threshold_factor, filter_size)
_end = datetime.datetime.now()
_delta = _end - _start
print(f'Took {_delta} ({_delta.total_seconds()}s)')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment