Skip to content

Instantly share code, notes, and snippets.

@nonchris
Last active July 11, 2024 10:36
Show Gist options
  • Save nonchris/d987ed199a307b2fc7fd1d9d63097965 to your computer and use it in GitHub Desktop.
Save nonchris/d987ed199a307b2fc7fd1d9d63097965 to your computer and use it in GitHub Desktop.
Simple Text to Speech GUI for OpenAI's API written in Python
"""
This is a very unclean, quick hack for educational purposes.
It allows you to interact with open ai's TTS backend via the API.
You can choose between all their voices.
Note: This code has a few known bugs/ issues:
* It uses a deprecated function for writing the audio
* The filename field for the user is just ignored.
* The API key is hardcoded in the software and can be extracted easily
* You can't paste using right click (ctrl+v works though)
* You can't change the language in the ui
...and probably a few more.
The UI part was mostly AI generated and isn't audited so far.
It just works good enough, for now.
You can use it if you like to, but keep these limitations in mind.
I had less than 25 minutes to build this.
And the key we're using was handed to everyone anyway so there was no security issue
(it had a hard limit and was invalidated afterwards)
"""
# to get started executed these commands in your terminal or inside your IDE:
# python3 -m venv venv (note: the command for python may vary on windows, it could be py or python too)
# source venv/bin/activate (note: you might need activate.bat or activate.ps1 on windows)
# python3 -m pip install openai
# python3 app.py
# build as .exe for windows:
# first replace the api key (around line 30) with your actual token
# pip install pyinstaller~=5.13
# pyinstaller .\text-to-speech-gui-openai.py --onefile --name text-zu-sprache.exe
import datetime as dt
import os
from os.path import expanduser
import subprocess
from pathlib import Path
import tkinter as tk
import tkinter.font as tkfont
from tkinter import filedialog
from tkinter import ttk
from openai import OpenAI
import ctypes
os.environ["OPENAI_API_KEY"] = "YOUR TOKEN"
client = OpenAI()
def call_voice(text: str, output_path: Path, voice: str):
response = client.audio.speech.create(
model="tts-1",
voice=voice,
input=text
)
response.stream_to_file(output_path)
class App:
def __init__(self):
self.file_path = None
self.window = tk.Tk()
if os.name == "nt":
ctypes.windll.shcore.SetProcessDpiAwareness(1)
self.window.title("Text-zu-Sprache Generator")
self.custom_font = tkfont.Font(size=11)
self.window.geometry("700x600")
self.init_ui()
self.window.mainloop()
def init_ui(self):
self.whitespace()
tk.Label(self.window, text="Dateiname:", font=self.custom_font).pack()
self.output_file_entry = tk.Entry(self.window, font=self.custom_font)
self.output_file_entry.pack()
tk.Label(self.window, text="(Optional. Standard ist speech_0.mp3, speech_1.mp3 usw.)",
font=self.custom_font).pack()
self.whitespace(2)
tk.Label(self.window, text="Stimme:", font=self.custom_font).pack()
self.voice_var = tk.StringVar(value="alloy")
self.voice_dropdown = ttk.Combobox(self.window, textvariable=self.voice_var,
values=["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
font=self.custom_font)
self.voice_dropdown.pack()
self.whitespace(2)
tk.Label(self.window, text="Text:", font=self.custom_font).pack()
self.text_entry = tk.Text(self.window, font=self.custom_font, wrap=tk.WORD, height=10)
self.text_entry.pack(expand=True, fill=tk.BOTH)
self.whitespace(2)
self.result_label = tk.Label(self.window, text="", font=self.custom_font)
self.result_label.pack()
button_frame = tk.Frame(self.window)
button_frame.pack()
tk.Button(button_frame, text="Generieren", command=self.generate_speech, font=self.custom_font).pack(
side=tk.LEFT)
tk.Button(button_frame, text="Ordner öffnen", command=self.open_folder, font=self.custom_font).pack(
side=tk.LEFT)
def generate_speech(self):
text = self.text_entry.get("1.0", tk.END).strip()
voice = self.voice_var.get()
if not text:
self.result_label.config(text="Fehler: Text darf nicht leer sein.", font=self.custom_font)
return
os.makedirs(f"{expanduser('~')}/audio", exist_ok=True)
base_file_name = self.output_file_entry.get() or "speech_0.mp3"
file = Path(f"{expanduser('~')}/audio/speech_0.mp3")
i = 0
while os.path.exists(file):
i += 1
file = Path(f"{expanduser('~')}/audio/speech_{i}.mp3")
print(file)
try:
start = dt.datetime.now()
print(f"Starting to generate at {start}")
call_voice(text, file, voice)
now = dt.datetime.now()
print(f"Done generating at {now}, this took: {(now - start).total_seconds()} seconds")
self.file_path = file
self.result_label.config(text=f"Erfolgreich! Datei gespeichert unter:\n{file}", font=self.custom_font)
except Exception as e:
self.result_label.config(text=f"Fehler: {e}", font=self.custom_font)
def open_folder(self):
if self.file_path:
folder = self.file_path.parent
if os.name == "nt":
os.startfile(folder)
elif os.name == "posix":
subprocess.call(["xdg-open", folder])
else:
self.result_label.config(text="Fehler: Plattform nicht unterstützt.", font=self.custom_font)
else:
self.result_label.config(text="Fehler: Keine Datei vorhanden.", font=self.custom_font)
def whitespace(self, rows=1):
for _ in range(rows):
tk.Label(self.window, text="", font=self.custom_font).pack()
if __name__ == '__main__':
App()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment