Created
November 1, 2019 11:57
-
-
Save kokeshing/84734793c5a14775531639e29048def4 to your computer and use it in GitHub Desktop.
test mel sp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import librosa | |
""" | |
https://github.com/r9y9/wavenet_vocoder/blob/a835a8eebfec5e049d93f629c4ee2f1daab56329/train.py#L460 | |
https://github.com/Rayhane-mamah/Tacotron-2/blob/ab5cb08a931fc842d3892ebeb27c8b8734ddd4b8/wavenet_vocoder/feeder.py#L368 | |
https://github.com/kokeshing/WaveNet-Estimator/blob/cc2ee6bb699cf977356b23e7513ba549cdfc874f/dataset.py#L38 | |
""" | |
def load_wav(path, sampling_rate): | |
wav = librosa.core.load(path, sr=sampling_rate)[0] | |
return wav | |
def get_mel_filter(sampling_rate, n_fft, num_mels, fmin=55, fmax=7600): | |
mel_filter = librosa.filters.mel(sampling_rate, n_fft, | |
n_mels=num_mels, | |
fmin=fmin, fmax=fmax) | |
return mel_filter | |
def melspectrogram(wav, mel_filter, n_fft=2048, hop_size=275, | |
win_size=1100, min_level_db=-100, ref_level_db=20): | |
d = librosa.stft(y=wav, n_fft=n_fft, hop_length=hop_size, | |
win_length=win_size, pad_mode='constant') | |
mel_sp = _linear_to_mel(np.abs(d), mel_filter) | |
mel_sp = _amp_to_db(mel_sp, min_level_db) - ref_level_db | |
return mel_sp | |
def _linear_to_mel(spectogram, mel_filter): | |
return np.dot(mel_filter, spectogram) | |
def _amp_to_db(x, min_level_db): | |
min_level = np.exp(min_level_db / 20 * np.log(10)) | |
return 20 * np.log10(np.maximum(min_level, x)) | |
def main(): | |
sampling_rate = 22050 | |
mel_filter = get_mel_filter(sampling_rate, n_fft=2048, num_mels=80) | |
all_wav = load_wav("test.wav", sampling_rate) | |
all_mel_sp = melspectrogram(all_wav, mel_filter) | |
mel_sp_len = all_mel_sp.shape[1] | |
pad = (all_wav.shape[0] // 275 + 1) * 275 - all_wav.shape[0] | |
all_wav = np.pad(all_wav, (0, pad), mode='constant', constant_values=0.0) | |
all_wav = all_wav[:mel_sp_len * 275] | |
max_time_frames = 40 | |
max_steps = 275 * 40 | |
for mel_offset in range(mel_sp_len - max_time_frames): | |
wav_offset = mel_offset * 275 | |
wav = all_wav[wav_offset:wav_offset + max_steps] | |
print(mel_offset) | |
assert np.allclose(melspectrogram(wav, mel_filter)[:, :40], all_mel_sp[:, mel_offset:mel_offset + max_time_frames]), "not same mel sp" | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment