human (Dec 18, 2024, 07:25 AM)
I need the visible counters to be shown twice - once for the most recent interaction and a second time with an accumulation of the counts of all the different token types and their total cost so far
paste.txt
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>OpenAI WebRTC Audio Session</title>
<style>
* {
box-sizing: border-box;
}
body {
font-family: Helvetica, Arial, sans-serif;
margin: 0;
padding: 20px;
background: #f5f5f5;
}
.container {
max-width: 800px;
margin: 0 auto;
}
.audio-indicator {
display: inline-block;
width: 20px;
height: 20px;
border-radius: 50%;
background: #ccc;
margin-right: 10px;
vertical-align: middle;
}
.audio-indicator.active {
background: #4CAF50;
animation: pulse 1s infinite;
}
@keyframes pulse {
0% { opacity: 1; }
50% { opacity: 0.5; }
100% { opacity: 1; }
}
.controls {
margin: 20px 0;
}
.form-group {
margin-bottom: 15px;
}
label {
display: block;
margin-bottom: 5px;
font-weight: bold;
}
input, select {
width: 100%;
padding: 8px;
font-size: 16px;
border: 1px solid #ddd;
border-radius: 4px;
}
button {
background: #007bff;
color: white;
border: none;
padding: 10px 20px;
font-size: 16px;
border-radius: 4px;
cursor: pointer;
}
button:disabled {
background: #ccc;
cursor: not-allowed;
}
.status {
margin-top: 10px;
padding: 10px;
border-radius: 4px;
}
.error {
background: #fee;
color: #c00;
}
.success {
background: #efe;
color: #0a0;
}
.events-container {
margin-top: 20px;
background: #fff;
border-radius: 4px;
padding: 15px;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
}
.event-entry {
font-family: monospace;
white-space: pre;
padding: 10px;
margin: 10px 0;
background: #f8f9fa;
border-radius: 4px;
border-left: 4px solid #007bff;
}
.event-timestamp {
color: #666;
font-size: 0.9em;
margin-bottom: 5px;
}
.cost-container {
margin: 20px 0;
padding: 15px;
background: #fff;
border-radius: 4px;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
}
.stats-grid {
display: grid;
grid-template-columns: repeat(3, 1fr);
gap: 20px;
margin-top: 15px;
}
.stats-section {
padding: 10px;
background: #f8f9fa;
border-radius: 4px;
}
.stats-section h3 {
margin: 0 0 10px 0;
font-size: 1.1em;
color: #495057;
}
.stats-section div {
margin: 5px 0;
font-size: 0.95em;
}
.stats-section span {
font-weight: bold;
color: #007bff;
}
</style>
</head>
<body>
<div class="container">
<h1>
<span id="audioIndicator" class="audio-indicator"></span>
OpenAI WebRTC Audio Session
</h1>
<div class="controls">
<div class="form-group">
<label for="tokenInput">OpenAI API Token</label>
<input type="password" id="tokenInput">
</div>
<div class="form-group">
<label for="voiceSelect">Voice</label>
<select id="voiceSelect">
<option value="ash">Ash</option>
<option value="ballad">Ballad</option>
<option value="coral">Coral</option>
<option value="sage">Sage</option>
<option value="verse">Verse</option>
</select>
</div>
<button id="startButton">Start Session</button>
</div>
<div id="status" class="status"></div>
<div class="cost-container">
<h2>Session Stats</h2>
<div class="stats-grid">
<div class="stats-section">
<h3>Input tokens</h3>
<div>Audio: <span id="audioInputTokens">0</span></div>
<div>Text: <span id="textInputTokens">0</span></div>
<div>Cached: <span id="cachedTokens">0</span></div>
</div>
<div class="stats-section">
<h3>Output tokens</h3>
<div>Audio: <span id="audioOutputTokens">0</span></div>
<div>Text: <span id="textOutputTokens">0</span></div>
</div>
<div class="stats-section">
<h3>Costs</h3>
<div>Input Cost: <span id="inputCost">$0.00</span></div>
<div>Output Cost: <span id="outputCost">$0.00</span></div>
<div>Total Cost: <span id="totalCost">$0.00</span></div>
</div>
</div>
</div>
<div class="events-container">
<h2>Session Events</h2>
<div id="events"></div>
</div>
</div>
<script type="module">
async function createRealtimeSession(inStream, token, voice) {
const pc = new RTCPeerConnection()
// Handle incoming audio
pc.ontrack = e => {
const audio = new Audio()
audio.srcObject = e.streams[0]
audio.play()
}
pc.addTrack(inStream.getTracks()[0])
// Create data channel for events
const dc = pc.createDataChannel("oai-events");
dc.addEventListener("message", (e) => {
try {
const eventData = JSON.parse(e.data);
// Update costs if this is a response.done event with usage info
if (eventData.type === 'response.done' &&
eventData.response &&
eventData.response.usage) {
const usage = eventData.response.usage;
const inputDetails = usage.input_token_details;
const outputDetails = usage.output_token_details;
const cachedDetails = inputDetails.cached_tokens_details;
const cachedInputTokens = inputDetails.cached_tokens;
const audioOutputTokens = outputDetails.audio_tokens;
const textOutputTokens = outputDetails.text_tokens;
// Calculate tokens minus cached tokens
const audioInputTokens = inputDetails.audio_tokens - cachedDetails.audio_tokens;
const textInputTokens = inputDetails.text_tokens - cachedDetails.text_tokens;
const costs = calculateCosts({audioInputTokens, textInputTokens, cachedInputTokens, audioOutputTokens, textOutputTokens});
// Update display elements
document.getElementById('audioInputTokens').textContent = audioInputTokens.toLocaleString();
document.getElementById('textInputTokens').textContent = textInputTokens.toLocaleString();
document.getElementById('cachedTokens').textContent = cachedInputTokens.toLocaleString();
document.getElementById('audioOutputTokens').textContent = audioOutputTokens.toLocaleString();
document.getElementById('textOutputTokens').textContent = textOutputTokens.toLocaleString();
// Update costs
document.getElementById('inputCost').textContent = `$${costs.inputCost.toFixed(4)}`;
document.getElementById('outputCost').textContent = `$${costs.outputCost.toFixed(4)}`;
document.getElementById('totalCost').textContent = `$${costs.totalCost.toFixed(4)}`;
}
addEventToLog(eventData);
} catch (err) {
console.error('Error parsing event data:', err);
}
});
const offer = await pc.createOffer()
await pc.setLocalDescription(offer)
const headers = {
Authorization: `Bearer ${token}`,
'Content-Type': 'application/sdp'
}
const opts = {
method: 'POST',
body: offer.sdp,
headers
}
const model = 'gpt-4o-realtime-preview-2024-12-17'
const resp = await fetch(`https://api.openai.com/v1/realtime?model=${model}&voice=${voice}`, opts)
await pc.setRemoteDescription({
type: 'answer',
sdp: await resp.text()
})
return pc
}
function calculateCosts({audioInputTokens, textInputTokens, cachedInputTokens, audioOutputTokens, textOutputTokens}) {
// Audio token costs
const AUDIO_INPUT_COST = 0.00004; // $40/1M tokens
const AUDIO_OUTPUT_COST = 0.00008; // $80/1M tokens
const CACHED_AUDIO_COST = 0.0000025; // $2.50/1M tokens
// Text token costs
const TEXT_INPUT_COST = 0.0000025; // $2.50/1M tokens
const TEXT_OUTPUT_COST = 0.00001; // $10.00/1M tokens
// Calculate input costs
const audioInputCost = audioInputTokens * AUDIO_INPUT_COST;
const cachedInputCost = cachedInputTokens * CACHED_AUDIO_COST;
const textInputCost = textInputTokens * TEXT_INPUT_COST;
const audioOutputCost = audioOutputTokens * AUDIO_OUTPUT_COST;
const textOutputCost = textOutputTokens * TEXT_OUTPUT_COST;
return {
inputCost: audioInputCost + cachedInputCost + textInputCost,
outputCost: audioOutputCost + textOutputCost,
totalCost: audioInputCost + cachedInputCost + textInputCost + audioOutputCost + textOutputCost
};
}
function updateCostDisplay(costs) {
document.getElementById('inputCost').textContent = `${costs.inputCost.toFixed(4)}`;
document.getElementById('outputCost').textContent = `${costs.outputCost.toFixed(4)}`;
document.getElementById('totalCost').textContent = `${costs.totalCost.toFixed(4)}`;
}
function addEventToLog(eventData) {
const eventsContainer = document.getElementById('events');
const eventEntry = document.createElement('div');
eventEntry.className = 'event-entry';
const timestamp = document.createElement('div');
timestamp.className = 'event-timestamp';
timestamp.textContent = new Date().toISOString();
const content = document.createElement('div');
content.textContent = JSON.stringify(eventData, null, 2);
eventEntry.appendChild(timestamp);
eventEntry.appendChild(content);
// Add new events at the top
eventsContainer.insertBefore(eventEntry, eventsContainer.firstChild);
// Optional: limit number of displayed events to prevent excessive DOM growth
while (eventsContainer.children.length > 50) {
eventsContainer.removeChild(eventsContainer.lastChild);
}
}
const startButton = document.getElementById('startButton')
const tokenInput = document.getElementById('tokenInput')
const voiceSelect = document.getElementById('voiceSelect')
const status = document.getElementById('status')
const audioIndicator = document.getElementById('audioIndicator')
let peerConnection = null
let audioContext = null
let audioStream = null
// Load saved API key on page load
document.addEventListener('DOMContentLoaded', () => {
const savedToken = localStorage.getItem('openai_api_key')
if (savedToken) {
tokenInput.value = savedToken
}
})
// Audio visualization
function setupAudioVisualization(stream) {
audioContext = new AudioContext()
const source = audioContext.createMediaStreamSource(stream)
const analyzer = audioContext.createAnalyser()
analyzer.fftSize = 256
source.connect(analyzer)
const bufferLength = analyzer.frequencyBinCount
const dataArray = new Uint8Array(bufferLength)
function updateIndicator() {
if (!audioContext) return
analyzer.getByteFrequencyData(dataArray)
const average = dataArray.reduce((a, b) => a + b) / bufferLength
audioIndicator.classList.toggle('active', average > 30)
requestAnimationFrame(updateIndicator)
}
updateIndicator()
}
async function startSession() {
try {
// Save API key to localStorage
localStorage.setItem('openai_api_key', tokenInput.value)
status.className = 'status'
status.textContent = 'Requesting microphone access...'
audioStream = await navigator.mediaDevices.getUserMedia({
audio: true,
video: false
})
setupAudioVisualization(audioStream)
status.textContent = 'Establishing connection...'
peerConnection = await createRealtimeSession(
audioStream,
tokenInput.value,
voiceSelect.value
)
status.className = 'status success'
status.textContent = 'Session established successfully!'
startButton.textContent = 'Stop Session'
} catch (err) {
status.className = 'status error'
status.textContent = `Error: ${err.message}`
console.error('Session error:', err)
stopSession()
}
}
function stopSession() {
if (peerConnection) {
peerConnection.close()
peerConnection = null
}
if (audioContext) {
audioContext.close()
audioContext = null
}
if (audioStream) {
audioStream.getTracks().forEach(track => track.stop())
audioStream = null
}
audioIndicator.classList.remove('active')
startButton.textContent = 'Start Session'
}
startButton.addEventListener('click', () => {
if (peerConnection) {
stopSession()
} else {
if (!tokenInput.value) {
status.className = 'status error'
status.textContent = 'Please enter an API token'
return
}
startSession()
}
})
// Cleanup on page unload
window.addEventListener('beforeunload', stopSession)
</script>
</body>
</html>
assistant (Dec 18, 2024, 07:26 AM)
I'll help modify the HTML to show both the current interaction stats and accumulated totals. I'll update the stats grid to show two sections side by side.
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>OpenAI WebRTC Audio Session</title>
<style>
* {
box-sizing: border-box;
}
body {
font-family: Helvetica, Arial, sans-serif;
margin: 0;
padding: 20px;
background: #f5f5f5;
}
.container {
max-width: 1200px;
margin: 0 auto;
}
.audio-indicator {
display: inline-block;
width: 20px;
height: 20px;
border-radius: 50%;
background: #ccc;
margin-right: 10px;
vertical-align: middle;
}
.audio-indicator.active {
background: #4CAF50;
animation: pulse 1s infinite;
}
@keyframes pulse {
0% { opacity: 1; }
50% { opacity: 0.5; }
100% { opacity: 1; }
}
.controls {
margin: 20px 0;
}
.form-group {
margin-bottom: 15px;
}
label {
display: block;
margin-bottom: 5px;
font-weight: bold;
}
input, select {
width: 100%;
padding: 8px;
font-size: 16px;
border: 1px solid #ddd;
border-radius: 4px;
}
button {
background: #007bff;
color: white;
border: none;
padding: 10px 20px;
font-size: 16px;
border-radius: 4px;
cursor: pointer;
}
button:disabled {
background: #ccc;
cursor: not-allowed;
}
.status {
margin-top: 10px;
padding: 10px;
border-radius: 4px;
}
.error {
background: #fee;
color: #c00;
}
.success {
background: #efe;
color: #0a0;
}
.stats-container {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 20px;
margin: 20px 0;
}
.stats-box {
background: #fff;
border-radius: 4px;
padding: 20px;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
}
.stats-box h2 {
margin: 0 0 15px 0;
padding-bottom: 10px;
border-bottom: 1px solid #eee;
}
.stats-grid {
display: grid;
grid-template-columns: repeat(3, 1fr);
gap: 20px;
}
.stats-section {
padding: 10px;
background: #f8f9fa;
border-radius: 4px;
}
.stats-section h3 {
margin: 0 0 10px 0;
font-size: 1.1em;
color: #495057;
}
.stats-section div {
margin: 5px 0;
font-size: 0.95em;
}
.stats-section span {
font-weight: bold;
color: #007bff;
}
.events-container {
margin-top: 20px;
background: #fff;
border-radius: 4px;
padding: 15px;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
}
.event-entry {
font-family: monospace;
white-space: pre;
padding: 10px;
margin: 10px 0;
background: #f8f9fa;
border-radius: 4px;
border-left: 4px solid #007bff;
}
.event-timestamp {
color: #666;
font-size: 0.9em;
margin-bottom: 5px;
}
</style>
</head>
<body>
<div class="container">
<h1>
<span id="audioIndicator" class="audio-indicator"></span>
OpenAI WebRTC Audio Session
</h1>
<div class="controls">
<div class="form-group">
<label for="tokenInput">OpenAI API Token</label>
<input type="password" id="tokenInput">
</div>
<div class="form-group">
<label for="voiceSelect">Voice</label>
<select id="voiceSelect">
<option value="ash">Ash</option>
<option value="ballad">Ballad</option>
<option value="coral">Coral</option>
<option value="sage">Sage</option>
<option value="verse">Verse</option>
</select>
</div>
<button id="startButton">Start Session</button>
</div>
<div id="status" class="status"></div>
<div class="stats-container">
<div class="stats-box">
<h2>Current Interaction</h2>
<div class="stats-grid">
<div class="stats-section">
<h3>Input tokens</h3>
<div>Audio: <span id="currentAudioInputTokens">0</span></div>
<div>Text: <span id="currentTextInputTokens">0</span></div>
<div>Cached: <span id="currentCachedTokens">0</span></div>
</div>
<div class="stats-section">
<h3>Output tokens</h3>
<div>Audio: <span id="currentAudioOutputTokens">0</span></div>
<div>Text: <span id="currentTextOutputTokens">0</span></div>
</div>
<div class="stats-section">
<h3>Costs</h3>
<div>Input Cost: <span id="currentInputCost">$0.00</span></div>
<div>Output Cost: <span id="currentOutputCost">$0.00</span></div>
<div>Total Cost: <span id="currentTotalCost">$0.00</span></div>
</div>
</div>
</div>
<div class="stats-box">
<h2>Session Total</h2>
<div class="stats-grid">
<div class="stats-section">
<h3>Input tokens</h3>
<div>Audio: <span id="totalAudioInputTokens">0</span></div>
<div>Text: <span id="totalTextInputTokens">0</span></div>
<div>Cached: <span id="totalCachedTokens">0</span></div>
</div>
<div class="stats-section">
<h3>Output tokens</h3>
<div>Audio: <span id="totalAudioOutputTokens">0</span></div>
<div>Text: <span id="totalTextOutputTokens">0</span></div>
</div>
<div class="stats-section">
<h3>Costs</h3>
<div>Input Cost: <span id="totalInputCost">$0.00</span></div>
<div>Output Cost: <span id="totalOutputCost">$0.00</span></div>
<div>Total Cost: <span id="totalTotalCost">$0.00</span></div>
</div>
</div>
</div>
</div>
<div class="events-container">
<h2>Session Events</h2>
<div id="events"></div>
</div>
</div>
<script type="module">
// Track cumulative totals
let sessionTotals = {
audioInputTokens: 0,
textInputTokens: 0,
cachedInputTokens: 0,
audioOutputTokens: 0,
textOutputTokens: 0,
inputCost: 0,
outputCost: 0,
totalCost: 0
}
async function createRealtimeSession(inStream, token, voice) {
const pc = new RTCPeerConnection()
pc.ontrack = e => {
const audio = new Audio()
audio.srcObject = e.streams[0]
audio.play()
}
pc.addTrack(inStream.getTracks()[0])
const dc = pc.createDataChannel("oai-events")
dc.addEventListener("message", (e) => {
try {
const eventData = JSON.parse(e.data)
if (eventData.type === 'response.done' &&
eventData.response &&
eventData.response.usage) {
const usage = eventData.response.usage
const inputDetails = usage.input_token_details
const outputDetails = usage.output_token_details
const cachedDetails = inputDetails.cached_tokens_details
const currentStats = {
audioInputTokens: inputDetails.audio_tokens - cachedDetails.audio_tokens,
textInputTokens: inputDetails.text_tokens - cachedDetails.text_tokens,
cachedInputTokens: inputDetails.cached_tokens,
audioOutputTokens: outputDetails.audio_tokens,
textOutputTokens: outputDetails.text_tokens
}
const costs = calculateCosts(currentStats)
// Update current interaction display
updateCurrentStats(currentStats, costs)
// Update session totals
updateSessionTotals(currentStats, costs)
}
addEventToLog(eventData)
} catch (err) {
console.error('Error parsing event data:', err)
}
})
const offer = await pc.createOffer()
await pc.setLocalDescription(offer)
const headers = {
Authorization: `Bearer ${token}`,
'Content-Type': 'application/sdp'
}
const opts = {
method: 'POST',
body: offer.sdp,
headers
}
const model = 'gpt-4o-realtime-preview-2024-12-17'
const resp = await fetch(`https://api.openai.com/v1/realtime?model=${model}&voice=${voice}`, opts)
await pc.setRemoteDescription({
type: 'answer',
sdp: await resp.text()
})
return pc
}
function calculateCosts({audioInputTokens, textInputTokens, cachedInputTokens, audioOutputTokens, textOutputTokens}) {
const AUDIO_INPUT_COST = 0.00004
const AUDIO_OUTPUT_COST = 0.00008
const CACHED_AUDIO_COST = 0.0000025
const TEXT_INPUT_COST = 0.0000025
const TEXT_OUTPUT_COST = 0.00001
const audioInputCost = audioInputTokens * AUDIO_INPUT_COST
const cachedInputCost = cachedInputTokens * CACHED_AUDIO_COST
const textInputCost = textInputTokens * TEXT_INPUT_COST
const audioOutputCost = audioOutputTokens * AUDIO_OUTPUT_COST
const textOutputCost = textOutputTokens * TEXT_OUTPUT_COST
return {
inputCost: audioInputCost + cachedInputCost + textInputCost,
outputCost: audioOutputCost + textOutputCost,
totalCost: audioInputCost + cachedInputCost + textInputCost + audioOutputCost + textOutputCost
}
}
function updateCurrentStats(stats, costs) {
document.getElementById('currentAudioInputTokens').textContent = stats.audioInputTokens.toLocaleString()
document.getElementById('currentTextInputTokens').textContent = stats.textInputTokens.toLocaleString()
document.getElementById('currentCachedTokens').textContent = stats.cachedInputTokens.toLocaleString()
document.getElementById('currentAudioOutputTokens').textContent = stats.audioOutputTokens.toLocaleString()
document.getElementById('currentTextOutputTokens').textContent = stats.textOutputTokens.toLocaleString()
document.getElementById('currentInputCost').textContent = `$${costs.inputCost.toFixed(4)}`
document.getElementById('currentOutputCost').textContent = `$${costs.outputCost.toFixed(4)}`
document.getElementById('currentTotalCost').textContent = `$${costs.totalCost.toFixed(4)}`
}
function updateSessionTotals(currentStats, costs) {
// Update running totals
sessionTotals.audioInputTokens += currentStats.audioInputTokens
sessionTotals.textInputTokens += currentStats.textInputTokens
sessionTotals.cachedInputTokens += currentStats.cachedInputTokens
sessionTotals.audioOutputTokens += currentStats.audioOutputTokens
sessionTotals.textOutputTokens += currentStats.textOutputTokens
sessionTotals.inputCost += costs.inputCost
sessionTotals.outputCost += costs.outputCost
sessionTotals.totalCost += costs.totalCost
// Update display
document.getElementById('totalAudioInputTokens').textContent = sessionTotals.audioInputTokens.toLocaleString()
document.getElementById('totalTextInputTokens').textContent = sessionTotals.textInputTokens.toLocaleString()
document.getElementById('totalCachedTokens').textContent = sessionTotals.cachedInputTokens.toLocaleString()
document.getElementById('totalAudioOutputTokens').textContent = sessionTotals.audioOutputTokens.toLocaleString()
document.getElementById('totalTextOutputTokens').textContent = sessionTotals.textOutputTokens.toLocaleString()
document.getElementById('totalInputCost').textContent = `$${sessionTotals.inputCost.toFixed(4)}`
document.getElementById('totalOutputCost').textContent = `$${sessionTotals.outputCost.toFixed(4)}`
document.getElementById('totalTotalCost').textContent = `$${sessionTotals.totalCost.toFixed(4)}`
}
function addEventToLog(eventData) {
const eventsContainer = document.getElementById('events')
const eventEntry = document.createElement('div')
eventEntry.className = 'event-entry'
const timestamp = document.createElement('div')
timestamp.className = 'event-timestamp'
timestamp.textContent = new Date().toISOString()
const content = document.createElement('div')
content.textContent = JSON.stringify(eventData, null, 2)
eventEntry.appendChild(timestamp)
eventEntry.appendChild(content)