Skip to content

Instantly share code, notes, and snippets.

@kokeshing
Created November 1, 2019 11:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kokeshing/84734793c5a14775531639e29048def4 to your computer and use it in GitHub Desktop.
Save kokeshing/84734793c5a14775531639e29048def4 to your computer and use it in GitHub Desktop.
test mel sp
import numpy as np
import librosa
"""
https://github.com/r9y9/wavenet_vocoder/blob/a835a8eebfec5e049d93f629c4ee2f1daab56329/train.py#L460
https://github.com/Rayhane-mamah/Tacotron-2/blob/ab5cb08a931fc842d3892ebeb27c8b8734ddd4b8/wavenet_vocoder/feeder.py#L368
https://github.com/kokeshing/WaveNet-Estimator/blob/cc2ee6bb699cf977356b23e7513ba549cdfc874f/dataset.py#L38
"""
def load_wav(path, sampling_rate):
wav = librosa.core.load(path, sr=sampling_rate)[0]
return wav
def get_mel_filter(sampling_rate, n_fft, num_mels, fmin=55, fmax=7600):
mel_filter = librosa.filters.mel(sampling_rate, n_fft,
n_mels=num_mels,
fmin=fmin, fmax=fmax)
return mel_filter
def melspectrogram(wav, mel_filter, n_fft=2048, hop_size=275,
win_size=1100, min_level_db=-100, ref_level_db=20):
d = librosa.stft(y=wav, n_fft=n_fft, hop_length=hop_size,
win_length=win_size, pad_mode='constant')
mel_sp = _linear_to_mel(np.abs(d), mel_filter)
mel_sp = _amp_to_db(mel_sp, min_level_db) - ref_level_db
return mel_sp
def _linear_to_mel(spectogram, mel_filter):
return np.dot(mel_filter, spectogram)
def _amp_to_db(x, min_level_db):
min_level = np.exp(min_level_db / 20 * np.log(10))
return 20 * np.log10(np.maximum(min_level, x))
def main():
sampling_rate = 22050
mel_filter = get_mel_filter(sampling_rate, n_fft=2048, num_mels=80)
all_wav = load_wav("test.wav", sampling_rate)
all_mel_sp = melspectrogram(all_wav, mel_filter)
mel_sp_len = all_mel_sp.shape[1]
pad = (all_wav.shape[0] // 275 + 1) * 275 - all_wav.shape[0]
all_wav = np.pad(all_wav, (0, pad), mode='constant', constant_values=0.0)
all_wav = all_wav[:mel_sp_len * 275]
max_time_frames = 40
max_steps = 275 * 40
for mel_offset in range(mel_sp_len - max_time_frames):
wav_offset = mel_offset * 275
wav = all_wav[wav_offset:wav_offset + max_steps]
print(mel_offset)
assert np.allclose(melspectrogram(wav, mel_filter)[:, :40], all_mel_sp[:, mel_offset:mel_offset + max_time_frames]), "not same mel sp"
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment