Skip to content

Instantly share code, notes, and snippets.

@mizoru
Created November 26, 2022 18:46
Show Gist options
  • Save mizoru/d4fc7596cd06d84f94e4809a1cb683a3 to your computer and use it in GitHub Desktop.
Save mizoru/d4fc7596cd06d84f94e4809a1cb683a3 to your computer and use it in GitHub Desktop.
get_data_to_buffer with energies
def get_data_to_buffer(train_config):
buffer = list()
text = process_text(train_config.data_path)
audio_files = sorted(Path(train_config.audio_path).iterdir())
spec = Spectrogram(512)
start = time.perf_counter()
for i, file in tqdm(zip(range(len(text)), audio_files)):
mel_gt_name = os.path.join(
train_config.mel_ground_truth, "ljspeech-mel-%05d.npy" % (i+1))
mel_gt_target = np.load(mel_gt_name)
duration = np.load(os.path.join(
train_config.alignment_path, str(i)+".npy"))
character = text[i][0:len(text[i])-1]
character = np.array(
text_to_sequence(character, train_config.text_cleaners))
wav, sr = torchaudio.load(file)
energy = torch.norm(spec(wav), p=2, dim=1)[0]
assert energy.shape[0] == mel_gt_target.shape[0]
np.save(f"energies/{i}", energy)
character = torch.from_numpy(character)
duration = torch.from_numpy(duration)
mel_gt_target = torch.from_numpy(mel_gt_target)
buffer.append({"text": character, "duration": duration,
"mel_target": mel_gt_target, "energy": energy})
end = time.perf_counter()
print("cost {:.2f}s to load all data into buffer.".format(end-start))
return buffer
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment