Skip to content

Instantly share code, notes, and snippets.

@ankurdhuriya
Created November 20, 2021 03:30
Show Gist options
  • Save ankurdhuriya/e536512ac7867658e79c5d2ca9322180 to your computer and use it in GitHub Desktop.
Save ankurdhuriya/e536512ac7867658e79c5d2ca9322180 to your computer and use it in GitHub Desktop.
resample wav files in paralled
import argparse
import librosa
import numpy as np
import os
import scipy
import scipy.io.wavfile
import sys
from glob import glob
from tqdm import tqdm
from joblib import Parallel, delayed
def check_directories(dir_input, dir_output):
if not os.path.exists(dir_input):
sys.exit('Error: Input directory does not exist: {}'.format(dir_input))
if not os.path.exists(dir_output):
sys.exit('Error: Output directory does not exist: {}'.format(dir_output))
abs_a = os.path.abspath(dir_input)
abs_b = os.path.abspath(dir_output)
if abs_a == abs_b:
sys.exit('Error: Paths are the same: {}'.format(abs_a))
def resample_file(input_filename, output_filename, sample_rate):
mono = True # librosa converts signal to mono by default, so I'm just surfacing this
audio, existing_rate = librosa.load(input_filename, sr=sample_rate, mono=mono)
audio /= 1.414 # Scale to [-1.0, 1.0]
audio *= 32767 # Scale to int16
audio = audio.astype(np.int16)
scipy.io.wavfile.write(output_filename, sample_rate, audio)
def downsample_wav_files(input_dir, output_dir, output_sample_rate):
check_directories(input_dir, output_dir)
inp_wav_paths = glob(input_dir+'/*.wav')
out_wav_paths = [os.path.join(output_dir, os.path.basename(p)) for p in inp_wav_paths]
_ = Parallel(n_jobs=-1)(delayed(resample_file)(i, o, output_sample_rate) for i, o in tqdm(zip(inp_wav_paths, out_wav_paths)))
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--input_dir', '-i', type=str, required=True)
parser.add_argument('--output_dir', '-o', type=str, required=True)
parser.add_argument('--output_sample_rate', '-s', type=int, required=True)
return parser.parse_args()
if __name__ == '__main__' :
args = parse_args()
downsample_wav_files(args.input_dir, args.output_dir, args.output_sample_rate)
print(f"\n\tCompleted")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment