Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tikhonova/c6425796a0935853377984bb82d46d27 to your computer and use it in GitHub Desktop.
Save tikhonova/c6425796a0935853377984bb82d46d27 to your computer and use it in GitHub Desktop.
''' Make metadata.csv and filelists via https://jaimeleal.github.io/how-to-speech-synthesis '''
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
filepath = 'E:/AlanWatts/dataset/transcripts2/'
files = os.listdir(filepath)
rows = []
for file in files:
filename = filepath + f'{file}'
# it += 1
# if it <= 2:
file_contents = open(filename, "r", encoding="utf-8").readlines()
rows.append([file[:-4], ''.join(file_contents)])
df = pd.DataFrame(rows, columns=["name", "transcript"])
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
df["wav_path"] = df["name"].apply("E:/AlanWatts/dataset/split_audio2/{}.wav".format)
# Add new columns
df["metadata"] = df["name"] + "|" + df[
"transcript"] # see Tacotron2 documentation reference `<audio file path>|<transcript>` https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/Tacotron2#getting-the-data
df["wav_text"] = df["wav_path"] + "|" + df["transcript"]
# Split files intro training, testing, and validation
train, test = train_test_split(df, test_size=0.1, random_state=1)
test, val = train_test_split(test, test_size=0.05, random_state=1)
metadata = df["metadata"]
audio_text_test_filelist = test["wav_text"]
audio_text_train_filelist = train["wav_text"]
audio_text_val_filelist = val["wav_text"]
metadata.to_csv("E:/AlanWatts/dataset/metadata.csv", index=False)
np.savetxt("E:/AlanWatts/dataset/filelists/audio_text_test_filelist.txt", audio_text_test_filelist.values, fmt="%s")
np.savetxt("E:/AlanWatts/dataset/filelists/audio_text_train_filelist.txt", audio_text_train_filelist.values, fmt="%s")
np.savetxt("E:/AlanWatts/dataset/filelists/audio_text_val_filelist.txt", audio_text_val_filelist.values, fmt="%s")
''' Meta filelist for Waveglow '''
import os
import pandas as pd
filepath = 'E:/AlanWatts/dataset/split_audio2/'
files = os.listdir(filepath)
rows = []
# it = 0
for file in files:
filename = filepath + f'{file}'
rows.append(filename)
df = pd.DataFrame(rows)
df.to_csv("E:/AlanWatts/dataset/waveglow.txt", index=False, header=False, sep='\t', mode='a')
# via https://github.com/tikhonova/what_would_alan_watts_say/blob/master/speech_synthesis/5_audio_transcript_cleanup_filelists_setup.py
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment