human (Oct 27, 2024, 09:16 PM)
Make the following modifications:
- Add a smaller "system prompt" textarea above the prompt textarea - if provided this should be included in the fetch() call as a
{"role": "system", "content": "content here "} item at the start of the messages array
- When the prompt has completed, show a big textarea at the bottom containaing the raw JSON returned from the API, pretty-printed - and a "Copy to clipboard" button below that which, when clicked, changes text to Copied! for 1.5 s
paste.txt
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>OpenAI Text-to-Speech</title>
<style>
body {
font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
max-width: 800px;
margin: 20px auto;
padding: 0 20px;
line-height: 1.6;
}
.info {
background: #e8f4ff;
padding: 15px;
border-radius: 4px;
margin: 20px 0;
border-left: 4px solid #0066cc;
}
.input-group {
display: flex;
flex-direction: column;
gap: 12px;
margin-bottom: 20px;
}
textarea {
width: 100%;
min-height: 150px;
padding: 12px;
font-size: 16px;
border: 1px solid #ccc;
border-radius: 4px;
resize: vertical;
}
select {
padding: 8px 12px;
font-size: 16px;
border: 1px solid #ccc;
border-radius: 4px;
max-width: 200px;
}
button {
padding: 8px 16px;
font-size: 16px;
background: #0066cc;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
max-width: 200px;
}
button:disabled {
background: #cccccc;
}
button:hover:not(:disabled) {
background: #0055aa;
}
.error {
color: #cc0000;
margin: 10px 0;
}
.player-container {
margin: 20px 0;
}
audio {
width: 100%;
margin: 10px 0;
}
.transcript {
background: #f5f5f5;
padding: 15px;
border-radius: 4px;
margin: 10px 0;
}
.loading {
color: #666;
font-style: italic;
}
</style>
</head>
<body><h1>Prompt gpt-4o-audio-preview</h1>
<div class="info">
Enter a prompt below and execute against <code>gpt-4o-audio-preview</code> to hear the results.
</div>
<div class="input-group">
<textarea id="promptInput" placeholder="Enter your text here..." aria-label="Input text"></textarea>
<select id="voiceSelect" aria-label="Voice selection">
<option value="alloy">Alloy</option>
<option value="echo">Echo</option>
<option value="fable">Fable</option>
<option value="onyx">Onyx</option>
<option value="nova">Nova</option>
<option value="shimmer">Shimmer</option>
</select>
<button id="submitBtn">Generate Speech</button>
</div>
<div id="error" class="error" style="display: none;"></div>
<div id="playerContainer" class="player-container" style="display: none;">
<audio id="audioPlayer" controls></audio>
<button id="downloadBtn">Download Audio</button>
<div id="transcript" class="transcript"></div>
</div>
<script>
const promptInput = document.getElementById('promptInput');
const voiceSelect = document.getElementById('voiceSelect');
const submitBtn = document.getElementById('submitBtn');
const errorDiv = document.getElementById('error');
const playerContainer = document.getElementById('playerContainer');
const audioPlayer = document.getElementById('audioPlayer');
const downloadBtn = document.getElementById('downloadBtn');
const transcriptDiv = document.getElementById('transcript');
function showError(message) {
errorDiv.textContent = message;
errorDiv.style.display = 'block';
playerContainer.style.display = 'none';
}
function clearError() {
errorDiv.style.display = 'none';
}
function getAPIKey() {
let apiKey = localStorage.getItem('openai_api_key');
if (!apiKey) {
apiKey = prompt('Please enter your OpenAI API Key:');
if (apiKey) {
localStorage.setItem('openai_api_key', apiKey);
}
}
return apiKey;
}
async function submitToAPI() {
const apiKey = getAPIKey();
if (!apiKey) {
alert('API Key is required.');
return;
}
const voice = voiceSelect.value;
submitBtn.textContent = 'Processing...';
submitBtn.disabled = true;
const prompt = promptInput.value;
const payload = {
"model": "gpt-4o-audio-preview",
"modalities": [
"text",
"audio"
],
"audio": {
"voice": voice,
"format": "wav"
},
"messages": [
{
"role": "user",
"content": prompt
}
]
};
try {
const response = await fetch('https://api.openai.com/v1/chat/completions', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${apiKey}`
},
body: JSON.stringify(payload)
});
const data = await response.json();
if (!response.ok) {
throw new Error(data.error?.message || 'API request failed');
}
// Extract audio data and transcript
const audioData = data.choices[0].message.audio.data;
const transcript = data.choices[0].message.audio.transcript;
// Create audio blob and URL
const binaryData = atob(audioData);
const arrayBuffer = new ArrayBuffer(binaryData.length);
const uint8Array = new Uint8Array(arrayBuffer);
for (let i = 0; i < binaryData.length; i++) {
uint8Array[i] = binaryData.charCodeAt(i);
}
const blob = new Blob([uint8Array], { type: 'audio/wav' });
const audioUrl = URL.createObjectURL(blob);
// Update UI
audioPlayer.src = audioUrl;
transcriptDiv.textContent = transcript;
playerContainer.style.display = 'block';
clearError();
// Set up download button
downloadBtn.onclick = () => {
const a = document.createElement('a');
a.href = audioUrl;
a.download = 'speech.wav';
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
};
} catch (error) {
console.error('Error:', error);
showError(error.message || 'An error occurred');
} finally {
submitBtn.textContent = 'Generate Speech';
submitBtn.disabled = false;
}
}
// Handle form submission
submitBtn.addEventListener('click', submitToAPI);
promptInput.addEventListener('keypress', (e) => {
if (e.key === 'Enter' && e.ctrlKey) {
submitToAPI();
}
});
</script>
</body>
</html>
assistant (Oct 27, 2024, 09:16 PM)
I'll help modify the HTML code to add these features. I'll create a React component with the changes since that will make it easier to manage the state and pretty-print the JSON.
import React, { useState, useCallback } from 'react';
const TextToSpeechInterface = () => {
const [systemPrompt, setSystemPrompt] = useState('');
const [prompt, setPrompt] = useState('');
const [voice, setVoice] = useState('alloy');
const [error, setError] = useState('');
const [isProcessing, setIsProcessing] = useState(false);
const [audioUrl, setAudioUrl] = useState('');
const [transcript, setTranscript] = useState('');
const [rawResponse, setRawResponse] = useState('');
human (Oct 27, 2024, 09:17 PM)
do not use react, use html and javascript
assistant (Oct 27, 2024, 09:17 PM)
I apologize for going with React. You're right - let's modify the existing HTML and JavaScript code instead.
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>OpenAI Text-to-Speech</title>
<style>
body {
font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
max-width: 800px;
margin: 20px auto;
padding: 0 20px;
line-height: 1.6;
}
.info {
background: #e8f4ff;
padding: 15px;
border-radius: 4px;
margin: 20px 0;
border-left: 4px solid #0066cc;
}
.input-group {
display: flex;
flex-direction: column;
gap: 12px;
margin-bottom: 20px;
}
textarea {
width: 100%;
padding: 12px;
font-size: 16px;
border: 1px solid #ccc;
border-radius: 4px;
resize: vertical;
font-family: inherit;
}
#systemPrompt {
min-height: 60px;
}
#promptInput {
min-height: 150px;
}
#responseJson {
min-height: 200px;
font-family: monospace;
background: #f5f5f5;
}
select {
padding: 8px 12px;
font-size: 16px;
border: 1px solid #ccc;
border-radius: 4px;
max-width: 200px;
}
button {
padding: 8px 16px;
font-size: 16px;
background: #0066cc;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
max-width: 200px;
}
button:disabled {
background: #cccccc;
}
button:hover:not(:disabled) {
background: #0055aa;
}
.error {
color: #cc0000;
margin: 10px 0;
}
.player-container {
margin: 20px 0;
}
audio {
width: 100%;
margin: 10px 0;
}
.transcript {
background: #f5f5f5;
padding: 15px;
border-radius: 4px;
margin: 10px 0;
}
.loading {
color: #666;
font-style: italic;
}
.json-container {
margin-top: 20px;
}
.copy-button {
margin-top: 8px;
background: #4CAF50;
}
.copy-button:hover:not(:disabled) {
background: #45a049;
}
</style>
</head>
<body>
<h1>Prompt gpt-4o-audio-preview</h1>
<div class="info">
Enter a prompt below and execute against <code>gpt-4o-audio-preview</code> to hear the results.
</div>
<div class="input-group">
<label for="systemPrompt">System Prompt (optional):</label>
<textarea id="systemPrompt" placeholder="Enter system prompt here..." aria-label="System prompt"></textarea>
<label for="promptInput">User Prompt:</label>
<textarea id="promptInput" placeholder="Enter your text here..." aria-label="Input text"></textarea>
<select id="voiceSelect" aria-label="Voice selection">
<option value="alloy">Alloy</option>
<option value="echo">Echo</option>
<option value="fable">Fable</option>
<option value="onyx">Onyx</option>
<option value="nova">Nova</option>
<option value="shimmer">Shimmer</option>
</select>
<button id="submitBtn">Generate Speech</button>
</div>
<div id="error" class="error" style="display: none;"></div>
<div id="playerContainer" class="player-container" style="display: none;">
<audio id="audioPlayer" controls></audio>
<button id="downloadBtn">Download Audio</button>
<div id="transcript" class="transcript"></div>
</div>
<div id="jsonContainer" class="json-container" style="display: none;">
<h3>API Response:</h3>
<textarea id="responseJson" readonly></textarea>
<button id="copyJsonBtn" class="copy-button">Copy to clipboard</button>
</div>
<script>
const promptInput = document.getElementById('promptInput');
const systemPrompt = document.getElementById('systemPrompt');
const voiceSelect = document.getElementById('voiceSelect');
const submitBtn = document.getElementById('submitBtn');
const errorDiv = document.getElementById('error');
const playerContainer = document.getElementById('playerContainer');
const audioPlayer = document.getElementById('audioPlayer');
const downloadBtn = document.getElementById('downloadBtn');
const transcriptDiv = document.getElementById('transcript');
const jsonContainer = document.getElementById('jsonContainer');
const responseJson = document.getElementById('responseJson');
const copyJsonBtn = document.getElementById('copyJsonBtn');
function showError(message) {
errorDiv.textContent = message;
errorDiv.style.display = 'block';
playerContainer.style.display = 'none';
jsonContainer.style.display = 'none';
}
function clearError() {
errorDiv.style.display = 'none';
}
function getAPIKey() {
let apiKey = localStorage.getItem('openai_api_key');
if (!apiKey) {
apiKey = prompt('Please enter your OpenAI API Key:');
if (apiKey) {
localStorage.setItem('openai_api_key', apiKey);
}
}
return apiKey;
}
copyJsonBtn.addEventListener('click', async () => {
try {
await navigator.clipboard.writeText(responseJson.value);
const originalText = copyJsonBtn.textContent;
copyJsonBtn.textContent = 'Copied!';
copyJsonBtn.disabled = true;
setTimeout(() => {
copyJsonBtn.textContent = originalText;
copyJsonBtn.disabled = false;
}, 1500);
} catch (err) {
console.error('Failed to copy text:', err);
}
});
async function submitToAPI() {
const apiKey = getAPIKey();
if (!apiKey) {
alert('API Key is required.');
return;
}
const voice = voiceSelect.value;
submitBtn.textContent = 'Processing...';
submitBtn.disabled = true;
const messages = [];
if (systemPrompt.value.trim()) {
messages.push({
"role": "system",
"content": systemPrompt.value
});
}
messages.push({
"role": "user",
"content": promptInput.value
});
const payload = {
"model": "gpt-4o-audio-preview",
"modalities": [
"text",
"audio"
],
"audio": {
"voice": voice,
"format": "wav"
},
"messages": messages
};
try {
const response = await fetch('https://api.openai.com/v1/chat/completions', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${apiKey}`
},
body: JSON.stringify(payload)
});
const data = await response.json();
if (!response.ok) {
throw new Error(data.error?.message || 'API request failed');
}
// Display raw JSON response
responseJson.value = JSON.stringify(data, null, 2);
jsonContainer.style.display = 'block';
// Extract audio data and transcript
const audioData = data.choices[0].message.audio.data;
const transcript = data.choices[0].message.audio.transcript;
// Create audio blob and URL
const binaryData = atob(audioData);
const arrayBuffer = new ArrayBuffer(binaryData.length);
const uint8Array = new Uint8Array(arrayBuffer);
for (let i = 0; i < binaryData.length; i++) {
uint8Array[i] = binaryData.charCodeAt(i);
}
const blob = new Blob([uint8Array], { type: 'audio/wav' });
const audioUrl = URL.createObjectURL(blob);
// Update UI
audioPlayer.src = audioUrl;
transcriptDiv.textContent = transcript;
playerContainer.style.display = 'block';
clearError();
// Set up download button
downloadBtn.onclick = () => {
const a = document.createElement('a');
a.href = audioUrl;
a.download = 'speech.wav';
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
};
} catch (error) {
console.error('Error:', error);
showError(error.message || 'An error occurred');
} finally {
submitBtn.textContent = 'Generate Speech';
submitBtn.disabled = false;
}
}
// Handle form submission
submitBtn.addEventListener('click', submitToAPI);
promptInput.addEventListener('keypress', (e) => {
if (e.key === 'Enter' && e.ctrlKey) {
submitToAPI();
}
});
</script>
</body>
</html>
I've made the following changes to the code:
- Added a system prompt textarea above the main prompt textarea
- Modified the
submitToAPI()
function to include the system prompt in the messages array when it's not empty - Added a new section at the bottom to display the raw JSON response in a textarea
- Added a "Copy to clipboard" button that changes to "Copied!" for 1.5 seconds when clicked
- Styled the new elements to match the existing design
- Made the JSON response textarea use a monospace font and light gray background for better readability
The new features work as follows:
- The system prompt is optional - if left empty, it won't be included in the API call
- The raw JSON response is displayed in a formatted, readable way after a successful API call
- The copy button uses the clipboard API to copy the JSON and provides visual feedback
- All the original functionality (audio playback, download, etc.) remains unchanged
The interface is now more complete for development and debugging purposes while maintaining its user-friendly design.