nonchris/text-to-speech-gui-openai.py

## text-to-speech-gui-openai.py
"""
This is a very unclean, quick hack for educational purposes.
It allows you to interact with open ai's TTS backend via the API.
You can choose between all their voices.

Note: This code has a few known bugs/ issues:
* It uses a deprecated function for writing the audio
* The filename field for the user is just ignored.
* The API key is hardcoded in the software and can be extracted easily
* You can't paste using right click (ctrl+v works though)
* You can't change the language in the ui
...and probably a few more.
The UI part was mostly AI generated and isn't audited so far.
It just works good enough, for now.

You can use it if you like to, but keep these limitations in mind.
I had less than 25 minutes to build this.
And the key we're using was handed to everyone anyway so there was no security issue
(it had a hard limit and was invalidated afterwards)
"""

# to get started executed these commands in your terminal or inside your IDE:
# python3 -m venv venv (note: the command for python may vary on windows, it could be py or python too)
# source venv/bin/activate (note: you might need activate.bat or activate.ps1 on windows)
# python3 -m pip install openai
# python3 app.py

# build as .exe for windows:
# first replace the api key (around line 30) with your actual token
# pip install pyinstaller~=5.13
# pyinstaller .\text-to-speech-gui-openai.py --onefile --name text-zu-sprache.exe

import datetime as dt
import os
from os.path import expanduser
import subprocess
from pathlib import Path
import tkinter as tk
import tkinter.font as tkfont
from tkinter import filedialog
from tkinter import ttk
from openai import OpenAI
import ctypes

os.environ["OPENAI_API_KEY"] = "YOUR TOKEN"

client = OpenAI()

def call_voice(text: str, output_path: Path, voice: str):
    response = client.audio.speech.create(
        model="tts-1",
        voice=voice,
        input=text
    )
    response.stream_to_file(output_path)


class App:
    def __init__(self):
        self.file_path = None
        self.window = tk.Tk()
        if os.name == "nt":
            ctypes.windll.shcore.SetProcessDpiAwareness(1)

        self.window.title("Text-zu-Sprache Generator")
        self.custom_font = tkfont.Font(size=11)
        self.window.geometry("700x600")

        self.init_ui()
        self.window.mainloop()

    def init_ui(self):
        self.whitespace()

        tk.Label(self.window, text="Dateiname:", font=self.custom_font).pack()
        self.output_file_entry = tk.Entry(self.window, font=self.custom_font)
        self.output_file_entry.pack()
        tk.Label(self.window, text="(Optional. Standard ist speech_0.mp3, speech_1.mp3 usw.)",
                 font=self.custom_font).pack()

        self.whitespace(2)

        tk.Label(self.window, text="Stimme:", font=self.custom_font).pack()
        self.voice_var = tk.StringVar(value="alloy")
        self.voice_dropdown = ttk.Combobox(self.window, textvariable=self.voice_var,
                                           values=["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
                                           font=self.custom_font)
        self.voice_dropdown.pack()

        self.whitespace(2)

        tk.Label(self.window, text="Text:", font=self.custom_font).pack()
        self.text_entry = tk.Text(self.window, font=self.custom_font, wrap=tk.WORD, height=10)
        self.text_entry.pack(expand=True, fill=tk.BOTH)

        self.whitespace(2)

        self.result_label = tk.Label(self.window, text="", font=self.custom_font)
        self.result_label.pack()

        button_frame = tk.Frame(self.window)
        button_frame.pack()

        tk.Button(button_frame, text="Generieren", command=self.generate_speech, font=self.custom_font).pack(
            side=tk.LEFT)
        tk.Button(button_frame, text="Ordner öffnen", command=self.open_folder, font=self.custom_font).pack(
            side=tk.LEFT)

    def generate_speech(self):
        text = self.text_entry.get("1.0", tk.END).strip()
        voice = self.voice_var.get()

        if not text:
            self.result_label.config(text="Fehler: Text darf nicht leer sein.", font=self.custom_font)
            return

        os.makedirs(f"{expanduser('~')}/audio", exist_ok=True)

        base_file_name = self.output_file_entry.get() or "speech_0.mp3"
        file = Path(f"{expanduser('~')}/audio/speech_0.mp3")
        i = 0
        while os.path.exists(file):
            i += 1
            file = Path(f"{expanduser('~')}/audio/speech_{i}.mp3")
            print(file)

        try:
            start = dt.datetime.now()
            print(f"Starting to generate at {start}")
            call_voice(text, file, voice)
            now = dt.datetime.now()
            print(f"Done generating at {now}, this took: {(now - start).total_seconds()} seconds")
            self.file_path = file
            self.result_label.config(text=f"Erfolgreich! Datei gespeichert unter:\n{file}", font=self.custom_font)
        except Exception as e:
            self.result_label.config(text=f"Fehler: {e}", font=self.custom_font)

    def open_folder(self):
        if self.file_path:
            folder = self.file_path.parent
            if os.name == "nt":
                os.startfile(folder)
            elif os.name == "posix":
                subprocess.call(["xdg-open", folder])
            else:
                self.result_label.config(text="Fehler: Plattform nicht unterstützt.", font=self.custom_font)
        else:
            self.result_label.config(text="Fehler: Keine Datei vorhanden.", font=self.custom_font)

    def whitespace(self, rows=1):
        for _ in range(rows):
            tk.Label(self.window, text="", font=self.custom_font).pack()


if __name__ == '__main__':
    App()
	"""
	This is a very unclean, quick hack for educational purposes.
	It allows you to interact with open ai's TTS backend via the API.
	You can choose between all their voices.

	Note: This code has a few known bugs/ issues:
	* It uses a deprecated function for writing the audio
	* The filename field for the user is just ignored.
	* The API key is hardcoded in the software and can be extracted easily
	* You can't paste using right click (ctrl+v works though)
	* You can't change the language in the ui
	...and probably a few more.
	The UI part was mostly AI generated and isn't audited so far.
	It just works good enough, for now.

	You can use it if you like to, but keep these limitations in mind.
	I had less than 25 minutes to build this.
	And the key we're using was handed to everyone anyway so there was no security issue
	(it had a hard limit and was invalidated afterwards)
	"""

	# to get started executed these commands in your terminal or inside your IDE:
	# python3 -m venv venv (note: the command for python may vary on windows, it could be py or python too)
	# source venv/bin/activate (note: you might need activate.bat or activate.ps1 on windows)
	# python3 -m pip install openai
	# python3 app.py

	# build as .exe for windows:
	# first replace the api key (around line 30) with your actual token
	# pip install pyinstaller~=5.13
	# pyinstaller .\text-to-speech-gui-openai.py --onefile --name text-zu-sprache.exe

	import datetime as dt
	import os
	from os.path import expanduser
	import subprocess
	from pathlib import Path
	import tkinter as tk
	import tkinter.font as tkfont
	from tkinter import filedialog
	from tkinter import ttk
	from openai import OpenAI
	import ctypes

	os.environ["OPENAI_API_KEY"] = "YOUR TOKEN"

	client = OpenAI()

	def call_voice(text: str, output_path: Path, voice: str):
	response = client.audio.speech.create(
	model="tts-1",
	voice=voice,
	input=text
	)
	response.stream_to_file(output_path)


	class App:
	def __init__(self):
	self.file_path = None
	self.window = tk.Tk()
	if os.name == "nt":
	ctypes.windll.shcore.SetProcessDpiAwareness(1)

	self.window.title("Text-zu-Sprache Generator")
	self.custom_font = tkfont.Font(size=11)
	self.window.geometry("700x600")

	self.init_ui()
	self.window.mainloop()

	def init_ui(self):
	self.whitespace()

	tk.Label(self.window, text="Dateiname:", font=self.custom_font).pack()
	self.output_file_entry = tk.Entry(self.window, font=self.custom_font)
	self.output_file_entry.pack()
	tk.Label(self.window, text="(Optional. Standard ist speech_0.mp3, speech_1.mp3 usw.)",
	font=self.custom_font).pack()

	self.whitespace(2)

	tk.Label(self.window, text="Stimme:", font=self.custom_font).pack()
	self.voice_var = tk.StringVar(value="alloy")
	self.voice_dropdown = ttk.Combobox(self.window, textvariable=self.voice_var,
	values=["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
	font=self.custom_font)
	self.voice_dropdown.pack()

	self.whitespace(2)

	tk.Label(self.window, text="Text:", font=self.custom_font).pack()
	self.text_entry = tk.Text(self.window, font=self.custom_font, wrap=tk.WORD, height=10)
	self.text_entry.pack(expand=True, fill=tk.BOTH)

	self.whitespace(2)

	self.result_label = tk.Label(self.window, text="", font=self.custom_font)
	self.result_label.pack()

	button_frame = tk.Frame(self.window)
	button_frame.pack()

	tk.Button(button_frame, text="Generieren", command=self.generate_speech, font=self.custom_font).pack(
	side=tk.LEFT)
	tk.Button(button_frame, text="Ordner öffnen", command=self.open_folder, font=self.custom_font).pack(
	side=tk.LEFT)

	def generate_speech(self):
	text = self.text_entry.get("1.0", tk.END).strip()
	voice = self.voice_var.get()

	if not text:
	self.result_label.config(text="Fehler: Text darf nicht leer sein.", font=self.custom_font)
	return

	os.makedirs(f"{expanduser('~')}/audio", exist_ok=True)

	base_file_name = self.output_file_entry.get() or "speech_0.mp3"
	file = Path(f"{expanduser('~')}/audio/speech_0.mp3")
	i = 0
	while os.path.exists(file):
	i += 1
	file = Path(f"{expanduser('~')}/audio/speech_{i}.mp3")
	print(file)

	try:
	start = dt.datetime.now()
	print(f"Starting to generate at {start}")
	call_voice(text, file, voice)
	now = dt.datetime.now()
	print(f"Done generating at {now}, this took: {(now - start).total_seconds()} seconds")
	self.file_path = file
	self.result_label.config(text=f"Erfolgreich! Datei gespeichert unter:\n{file}", font=self.custom_font)
	except Exception as e:
	self.result_label.config(text=f"Fehler: {e}", font=self.custom_font)

	def open_folder(self):
	if self.file_path:
	folder = self.file_path.parent
	if os.name == "nt":
	os.startfile(folder)
	elif os.name == "posix":
	subprocess.call(["xdg-open", folder])
	else:
	self.result_label.config(text="Fehler: Plattform nicht unterstützt.", font=self.custom_font)
	else:
	self.result_label.config(text="Fehler: Keine Datei vorhanden.", font=self.custom_font)

	def whitespace(self, rows=1):
	for _ in range(rows):
	tk.Label(self.window, text="", font=self.custom_font).pack()


	if __name__ == '__main__':
	App()