Skip to content

Instantly share code, notes, and snippets.

@thoraxe
Created April 13, 2023 18:24
Show Gist options
  • Save thoraxe/40e070a321f58c0904c8fe25d7fed261 to your computer and use it in GitHub Desktop.
Save thoraxe/40e070a321f58c0904c8fe25d7fed261 to your computer and use it in GitHub Desktop.
def build_path_to_transcript_dict_generic_ljspeech(root):
path_to_transcript = dict()
with open(os.path.join(root, "metadata.csv"), "r", encoding="utf8") as file:
lookup = file.read()
for line in lookup.split("\n"):
if line.strip() != "":
norm_transcript = line.split("|")[1]
print(norm_transcript)
wav_path = os.path.join(root, "wavs", line.split("|")[0] + ".wav")
print(wav_path)
if os.path.exists(wav_path):
path_to_transcript[wav_path] = norm_transcript
return limit_to_n(path_to_transcript)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment