|
""" |
|
Author: Clement TIFFON |
|
Date: 14-06-2023 |
|
Description: This is a simple GUI for the OpenAI Text-to-Speech API. This is an example of a Streamlit app that uses the OpenAI API to convert text to speech. The app allows users to enter text in a chat input field, which is then converted to speech using the OpenAI API. The app displays the chat history and the corresponding audio files. The app uses the OpenAI Python client library to interact with the OpenAI API. The app also uses the Streamlit chat input and audio components to create a chat interface for the user. The app is a simple example of how to use the OpenAI API with Streamlit to create a text-to-speech application. |
|
|
|
to run the app, you need to install the following libraries: |
|
- streamlit |
|
- openai |
|
- numpy |
|
|
|
You can install the libraries using the following command: |
|
pip install streamlit openai numpy |
|
|
|
Affter that, you can run the app like this : |
|
- be sure to be in the same directory as the file (using the cd command) |
|
- use the appriopriate python environment with the libraries installed |
|
- run the app using the following command in the terminal : |
|
>>> streamlit run simple_tts_gui.py |
|
|
|
the app will open in your default browser and you can use it to convert text to speech using the OpenAI API. |
|
|
|
IMPORTANT : the OpenAI API is a paid service. For generate like 10 min of audio that cost aproximatly 0.5β¬ (0.6$). So be careful with the usage of the API. And /!\ don't share your API key /!\ |
|
|
|
PUT USAGE LIMITS OF THE API in your account to avoid any bad surprise. I'm not responsible for any misuse of the API. this is more an example of how to use the API with Streamlit. |
|
|
|
for more information about the OpenAI API, you can visit the official website: https://platform.openai.com/docs/guides/text-to-speech |
|
and for more information about the Streamlit library, you can visit the official website: https://streamlit.io/ |
|
""" |
|
import streamlit as st |
|
from pathlib import Path |
|
from openai import OpenAI |
|
from datetime import datetime |
|
import os |
|
import logging |
|
|
|
# make the current working directory the same as the directory of the script |
|
os.chdir(Path(__file__).resolve().parent) |
|
logging.basicConfig(filename='chat_log.log', |
|
level=logging.INFO, |
|
format='%(asctime)s - %(message)s') |
|
# Configure logging to write to a file, including the level and format of the log messages |
|
logging.info("Starting the app") # Log the start of the app |
|
logging.info(f"Current working directory: {Path.cwd()}" |
|
) # Log the current working directory |
|
logging.info( |
|
f"Path to audio files directory: {Path.cwd().joinpath('audio_files')}") |
|
Path_audio_files = Path.cwd().joinpath("audio_files") |
|
# Create the directory if it doesn't exist the parents argument is to create the parent directories if they don't exist and the exist_ok argument is to avoid raising an error if the directory already exists |
|
Path_audio_files.mkdir(parents=True, exist_ok=True) |
|
# Path("audio_files").mkdir(parents=True, exist_ok=True) |
|
|
|
st.title("Simple TTS GUI") # Title of the app |
|
|
|
st.write("This is a simple GUI for the OpenAI Text-to-Speech API." |
|
) # Description of the app |
|
|
|
import numpy as np |
|
# Check if 'chat_input' and 'audio_files' are in the session state |
|
# If not, initialize them as empty lists |
|
if "chat_input" not in st.session_state: # session state is a dictionary that persists across reruns of the script |
|
st.session_state.chat_input = [] |
|
if "audio_files" not in st.session_state: |
|
st.session_state.audio_files = [] |
|
|
|
# Create a text input field for the user to enter text |
|
prompt = st.chat_input("Enter text to convert to speech:") |
|
|
|
# Initialize OpenAI client |
|
|
|
client = OpenAI( |
|
api_key="..." |
|
) # Replace ... with your OpenAI API key Go to https://platform.openai.com/account/api-keys to get your API key |
|
if "file_count" not in st.session_state: |
|
st.session_state.file_count = 1 |
|
if prompt: |
|
# Add the user's input to the chat_input session state |
|
st.session_state.chat_input.append(prompt) |
|
logging.info(f"User input: {prompt}") # Log the user input |
|
|
|
with st.spinner("Converting text to speech..."): |
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") |
|
filename = f"A{st.session_state.file_count}_{timestamp}.mp3" |
|
speech_file_path = Path.cwd() / "audio_files" / filename |
|
# make an request to the OpenAI API to convert the text to speech |
|
response = client.audio.speech.create(model="tts-1", |
|
voice="onyx", |
|
input=prompt) |
|
|
|
# stream the response to a file and read the file |
|
response.stream_to_file(speech_file_path) |
|
with open(speech_file_path, 'rb') as f: |
|
audio_file = f.read() |
|
logging.info(f"AI response : making audio file {filename}" |
|
) # Log the AI response |
|
# Add the audio file to the audio_files session state |
|
# add the audio file to the session state |
|
with st.sidebar: |
|
st.audio(audio_file) |
|
st.session_state.audio_files.append(audio_file) |
|
st.session_state.file_count += 1 |
|
# Display chat history |
|
for i in range(len(st.session_state.chat_input)): |
|
with st.chat_message('user'): |
|
st.markdown(st.session_state.chat_input[i]) |
|
with st.chat_message('ai', avatar="π€"): |
|
st.audio(st.session_state.audio_files[i]) |