Skip to content

Instantly share code, notes, and snippets.

@zabir-nabil
Created June 27, 2021 15:56
Show Gist options
  • Save zabir-nabil/ffbe3b37c71b89da826b3c4463313568 to your computer and use it in GitHub Desktop.
Save zabir-nabil/ffbe3b37c71b89da826b3c4463313568 to your computer and use it in GitHub Desktop.
Silence removal from audio file (.sph to .wav conversion) and segment the file in equal duration
# author: github.com/zabir-nabil
import librosa
from pydub import AudioSegment
import pydub
import os
def segment_aud_eq(audio_segment, k):
# k denotes, seconds * 1000
a_segs = [audio_segment[i*k:min((i+1)*k, len(audio_segment)-1)] for i in range(len(audio_segment)//k)]
return a_segs
def silence_remove_segment(filename, silence_thresh=-60., segment_size = 8.0, save = False, save_path = "", ret = True):
# takes an wav/sph file/anything that librosa supports
# removes the silence with a threshold
# makes a list of segment of size >= segment_size (in sec.)
# saves the wav file or returns a numpy array 16 bit PCM
y, sr = librosa.load(filename)
# convert from float to uint16
y = np.array(y * (1<<15), dtype=np.int16)
audio_segment = pydub.AudioSegment(
y.tobytes(),
frame_rate=sr,
sample_width=y.dtype.itemsize,
channels=1
)
aud_segs = pydub.silence.split_on_silence(audio_segment, silence_thresh=silence_thresh)
# join all
all_seg = sum(aud_segs)
eq_segs = segment_aud_eq(all_seg, int(segment_size * 1000)) # 1000 because, in AudioSegment 1s is 1000 points
if save:
# save as wav
bn = os.path.basename(filename)
for i, s in enumerate(eq_segs):
s.export(f"{os.path.join(save_path, bn.split('.')[0])}_{i}.wav", format="wav")
if ret:
return np.array([s.get_array_of_samples() for s in eq_segs], dtype = np.int16)
# usage
sigs = silence_remove_segment("drive/MyDrive/test.sph", save = True)
print(sigs.shape)
print(sigs)
"""
(7, 176400)
[[ 7 9 8 ... 62 3 -53]
[ -86 -85 -51 ... 131 23 -92]
[-194 -262 -286 ... 58 666 912]
...
[ 7 7 8 ... 1044 876 538]
[ 112 -303 -630 ... 748 619 481]
[ 360 273 223 ... -145 -625 -936]]
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment