Last active
November 15, 2023 00:45
-
-
Save paschott/d660289ac34fa7d4934bca8c3ea055ab to your computer and use it in GitHub Desktop.
Text to Speech for creating a practice spelling bee audio file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# requires gtts | |
# requires pydub | |
# requires ffmpeg to be installed on the system | |
from gtts import gTTS | |
from pydub import AudioSegment | |
import os | |
def generate_word_audio(word, output_folder='audio'): | |
if not os.path.exists(output_folder): | |
os.makedirs(output_folder) | |
print(word) | |
tts = gTTS(text=word, lang='en-us') | |
tts.save(os.path.join(output_folder, f"{word}.mp3")) | |
def generate_silence(duration_ms): | |
return AudioSegment.silent(duration=duration_ms) | |
def read_words(words, output_folder='audio'): | |
audio_segments = [] | |
for i, word in enumerate(words): | |
# Generate the word and its repetition | |
generate_word_audio(word, output_folder) | |
audio_segments.append(AudioSegment.from_mp3(os.path.join(output_folder, f"{word}.mp3"))) | |
audio_segments.append(generate_silence(5000)) | |
audio_segments.append(AudioSegment.from_mp3(os.path.join(output_folder, f"{word}.mp3"))) | |
# Add a 10-second silence between each word or repeated word, except for the last word | |
if i < len(words) - 1: | |
audio_segments.append(generate_silence(5000)) | |
combined = AudioSegment.silent(duration=0) | |
for segment in audio_segments: | |
combined += segment | |
combined.export(os.path.join(output_folder, "Spelling_Grades_45.mp3"), format="mp3") | |
def main(): | |
with open('C:\\temp\\Spelling\\Spelling_Grades_45.txt', 'r') as file: | |
words = file.read().splitlines() | |
read_words(words) | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
solitude | |
nearsighted | |
agreeable | |
potential | |
remissible | |
lecture | |
lynch | |
foundation | |
nowhere | |
audience | |
theft | |
quick-freeze | |
specialist | |
charcoal | |
sculptor | |
abolish | |
intuition | |
optical | |
oldster | |
zipper | |
nutcracker | |
emissary | |
rabies | |
heredity | |
tuition | |
unitary | |
dormant | |
casualty | |
blameworthy | |
rambled | |
barbecue | |
illuminate | |
liquidate | |
piddling | |
dumbwaiter | |
latecomer | |
adequate | |
infectious | |
endeavor | |
qualifier | |
herbivore | |
teenybopper | |
jasmine | |
efficiency | |
onionskin | |
shrubbery | |
megapixel | |
identity | |
procedure | |
originally |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Definitely a work in progress. Uses Python v3.12, gtts, pydub, and ffmpeg to generate an MP3 that speaks the words in a specific cadence, speaking the words twice in a 5s separation before moving to the next word.
gtts doesn't allow for any customization other than the language/culture used. You get whatever language Google's TTS engine uses for that culture code.