Skip to content

Instantly share code, notes, and snippets.

@acetylSv
Created April 21, 2019 07:12
Show Gist options
  • Save acetylSv/1a233574234cb4188629c90b49bee1da to your computer and use it in GitHub Desktop.
Save acetylSv/1a233574234cb4188629c90b49bee1da to your computer and use it in GitHub Desktop.
Script for preprocessing, take spk p251 vs p299 for example.
import os, sys
import numpy as np
import h5py
from collections import defaultdict
from hyperparams import Hyperparams as hp
from utils import *
h5py_path = sys.argv[1]
dataset_path = sys.argv[2]
p251_mcs = []
p251_logf0s = []
p299_mcs = []
p299_logf0s = []
# extracting
for dirPath, dirNames, fileNames in os.walk(dataset_path):
for f in fileNames:
ids = f.replace('.wav', '')[1:]
speaker_id, uttr_id = ids.split('_')
fname = os.path.join(dirPath, f)
print('processing %s' % fname)
y, sr = librosa.load(fname, sr=hp.sr, dtype=np.float64)
mc, f0, ap = get_MCEPs(y)
logf0 = np.log(f0 + 1.0)
if speaker_id == '251':
p251_mcs.extend(mc)
p251_logf0s.extend(logf0)
else:
p299_mcs.extend(mc)
p299_logf0s.extend(logf0)
p251_mc_mean = np.mean(np.array(p251_mcs), axis=0)
p251_mc_std = np.std(np.array(p251_mcs), axis=0)
p251_logf0_mean = np.mean(np.array(p251_logf0s))
p251_logf0_std = np.std(np.array(p251_logf0s))
p299_mc_mean = np.mean(np.array(p299_mcs), axis=0)
p299_mc_std = np.std(np.array(p299_mcs), axis=0)
p299_logf0_mean = np.mean(np.array(p299_logf0s))
p299_logf0_std = np.std(np.array(p299_logf0s))
# start to make h5py
with h5py.File(h5py_path, 'w') as f_h5:
for dirPath, dirNames, fileNames in os.walk(dataset_path):
for f in fileNames:
ids = f.replace('.wav', '')[1:]
speaker_id, uttr_id = ids.split('_')
if speaker_id == '251':
mc_mean = p251_mc_mean
mc_std = p251_mc_std
logf0_mean = p251_logf0_mean
logf0_std = p251_logf0_std
else:
mc_mean = p299_mc_mean
mc_std = p299_mc_std
logf0_mean = p299_logf0_mean
logf0_std = p299_logf0_std
fname = os.path.join(dirPath, f)
y, sr = librosa.load(fname, sr=hp.sr, dtype=np.float64)
mc, f0, ap = get_MCEPs(y)
normed_mc = np.expand_dims((mc - mc_mean) / mc_std, axis=0)
normed_logf0 = np.expand_dims(((np.log(f0 + 1.0)) - logf0_mean) / logf0_std, axis=0)
ap = np.expand_dims(ap, axis=0)
# features
f_h5.create_dataset('{}/{}/normed_mc'.format(speaker_id, uttr_id), data=np.array(normed_mc),
dtype=np.float64)
f_h5.create_dataset('{}/{}/normed_logf0'.format(speaker_id, uttr_id), data=np.array(normed_logf0),
dtype=np.float64)
f_h5.create_dataset('{}/{}/ap'.format(speaker_id, uttr_id), data=np.array(ap),
dtype=np.float64)
mc_mean = np.expand_dims(mc_mean, axis=0)
mc_std = np.expand_dims(mc_std, axis=0)
logf0_mean = np.expand_dims(logf0_mean, axis=0)
logf0_std = np.expand_dims(logf0_std, axis=0)
# statistics
f_h5.create_dataset('{}/{}/mc_mean'.format(speaker_id, uttr_id), data=np.array(mc_mean),
dtype=np.float64)
f_h5.create_dataset('{}/{}/mc_std'.format(speaker_id, uttr_id), data=np.array(mc_std),
dtype=np.float64)
f_h5.create_dataset('{}/{}/logf0_mean'.format(speaker_id, uttr_id), data=np.array(logf0_mean),
dtype=np.float64)
f_h5.create_dataset('{}/{}/logf0_std'.format(speaker_id, uttr_id), data=np.array(logf0_std),
dtype=np.float64)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment