Skip to content

Instantly share code, notes, and snippets.

@simonw

simonw/modify.md Secret

Last active October 21, 2024 05:45
Show Gist options
  • Save simonw/a04b844a5e8b01cecd28787ed375e738 to your computer and use it in GitHub Desktop.
Save simonw/a04b844a5e8b01cecd28787ed375e738 to your computer and use it in GitHub Desktop.

Modifying Audio Recording Tool

human (Oct 17, 2024, 12:18 PM)

We are going to modify this tool. Say you understand.

paste.txt
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>OpenAI Audio</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            display: flex;
            justify-content: center;
            align-items: center;
            min-height: 100vh;
            margin: 0;
            background-color: #f0f0f0;
        }
        .container {
            text-align: center;
            background-color: white;
            padding: 2rem;
            border-radius: 8px;
            box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
            max-width: 600px;
            width: 100%;
        }
        button {
            font-size: 1rem;
            padding: 0.5rem 1rem;
            margin: 0.5rem;
            cursor: pointer;
        }
        #timer {
            font-size: 1.5rem;
            margin: 1rem 0;
        }
        #audioPlayback, #prompt {
            margin-top: 1rem;
            width: 100%;
        }
        #prompt {
            height: 100px;
            resize: vertical;
        }
        #apiResponse {
            margin-top: 1rem;
            text-align: left;
            white-space: pre-wrap;
            background-color: #f8f8f8;
            padding: 1rem;
            border-radius: 4px;
            overflow-x: auto;
        }
    </style>
</head>
<body>
    <div class="container">
        <h1>OpenAI Audio</h1>
        <button id="recordButton">Start Recording</button>
        <div id="timer">00:00</div>
        <audio id="audioPlayback" controls></audio>
        <textarea id="prompt" placeholder="Enter your prompt here"></textarea>
        <button id="submitButton">Submit to API</button>
        <div id="apiResponse"></div>
    </div>

    <script>
        let audioContext;
        let recorder;
        let audioChunks = [];
        let startTime;
        let timerInterval;
        let audioBlob;
        let isRecording = false;
        const recordButton = document.getElementById('recordButton');
        const timer = document.getElementById('timer');
        const audioPlayback = document.getElementById('audioPlayback');
        const promptTextarea = document.getElementById('prompt');
        const submitButton = document.getElementById('submitButton');
        const apiResponse = document.getElementById('apiResponse');

        recordButton.addEventListener('click', toggleRecording);
        submitButton.addEventListener('click', submitToAPI);

        async function toggleRecording() {
            if (!isRecording) {
                await startRecording();
            } else {
                stopRecording();
            }
        }

        async function startRecording() {
            try {
                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                audioContext = new (window.AudioContext || window.webkitAudioContext)();
                const source = audioContext.createMediaStreamSource(stream);
                const processor = audioContext.createScriptProcessor(1024, 1, 1);

                source.connect(processor);
                processor.connect(audioContext.destination);

                audioChunks = [];

                processor.onaudioprocess = (e) => {
                    const inputData = e.inputBuffer.getChannelData(0);
                    audioChunks.push(new Float32Array(inputData));
                };

                isRecording = true;
                startTime = Date.now();
                updateTimer();
                timerInterval = setInterval(updateTimer, 1000);
                recordButton.textContent = 'Stop Recording';
            } catch (error) {
                console.error('Error starting recording:', error);
                alert('Error starting recording. Please make sure you have given permission to use the microphone.');
            }
        }

        function stopRecording() {
            if (audioContext) {
                audioContext.close();
                audioContext = null;
            }
            clearInterval(timerInterval);
            recordButton.textContent = 'Start Recording';
            isRecording = false;

            // Convert to WAV
            const wavBlob = createWavBlob(audioChunks);
            audioBlob = wavBlob;

            const audioUrl = URL.createObjectURL(audioBlob);
            audioPlayback.src = audioUrl;
        }

        function updateTimer() {
            const elapsed = Math.floor((Date.now() - startTime) / 1000);
            const minutes = Math.floor(elapsed / 60).toString().padStart(2, '0');
            const seconds = (elapsed % 60).toString().padStart(2, '0');
            timer.textContent = `${minutes}:${seconds}`;
        }

        function createWavBlob(audioChunks) {
            const sampleRate = 44100;
            const numChannels = 1;
            const bitsPerSample = 16;
            const bytesPerSample = bitsPerSample / 8;
            const blockAlign = numChannels * bytesPerSample;

            const buffer = mergeAudioBuffers(audioChunks);
            const dataLength = buffer.length * bytesPerSample;
            const wavDataLength = 36 + dataLength;

            const headerBuffer = new ArrayBuffer(44);
            const view = new DataView(headerBuffer);

            writeString(view, 0, 'RIFF');
            view.setUint32(4, wavDataLength, true);
            writeString(view, 8, 'WAVE');
            writeString(view, 12, 'fmt ');
            view.setUint32(16, 16, true);
            view.setUint16(20, 1, true);
            view.setUint16(22, numChannels, true);
            view.setUint32(24, sampleRate, true);
            view.setUint32(28, sampleRate * blockAlign, true);
            view.setUint16(32, blockAlign, true);
            view.setUint16(34, bitsPerSample, true);
            writeString(view, 36, 'data');
            view.setUint32(40, dataLength, true);

            const wavBuffer = new Int16Array(headerBuffer.byteLength + dataLength);
            wavBuffer.set(new Int16Array(headerBuffer));
            wavBuffer.set(convertToInt16(buffer), headerBuffer.byteLength / 2);

            return new Blob([wavBuffer], { type: 'audio/wav' });
        }

        function writeString(view, offset, string) {
            for (let i = 0; i < string.length; i++) {
                view.setUint8(offset + i, string.charCodeAt(i));
            }
        }

        function mergeAudioBuffers(buffers) {
            let totalLength = 0;
            for (let buffer of buffers) {
                totalLength += buffer.length;
            }
            const result = new Float32Array(totalLength);
            let offset = 0;
            for (let buffer of buffers) {
                result.set(buffer, offset);
                offset += buffer.length;
            }
            return result;
        }

        function convertToInt16(float32Array) {
            const int16Array = new Int16Array(float32Array.length);
            for (let i = 0; i < float32Array.length; i++) {
                const s = Math.max(-1, Math.min(1, float32Array[i]));
                int16Array[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
            }
            return int16Array;
        }

        async function submitToAPI() {
            if (!audioBlob) {
                alert('Please record audio first.');
                return;
            }

            const apiKey = getAPIKey();
            if (!apiKey) {
                alert('API Key is required.');
                return;
            }

            const base64Audio = await blobToBase64(audioBlob);
            const prompt = promptTextarea.value;

            const payload = {
                model: "gpt-4o-audio-preview",
                modalities: ["text"],
                messages: [
                    {
                        role: "user",
                        content: [
                            {type: "text", text: prompt},
                            {
                                type: "input_audio",
                                input_audio: {
                                    data: base64Audio,
                                    format: "wav"
                                }
                            }
                        ]
                    }
                ]
            };

            try {
                const response = await fetch('https://api.openai.com/v1/chat/completions', {
                    method: 'POST',
                    headers: {
                        'Content-Type': 'application/json',
                        'Authorization': `Bearer ${apiKey}`
                    },
                    body: JSON.stringify(payload)
                });

                const data = await response.json();
                apiResponse.textContent = JSON.stringify(data, null, 2);
            } catch (error) {
                console.error('Error:', error);
                apiResponse.textContent = `Error: ${error.message}`;
            }
        }

        function getAPIKey() {
            let apiKey = localStorage.getItem('openai_api_key');
            if (!apiKey) {
                apiKey = prompt('Please enter your OpenAI API Key:');
                if (apiKey) {
                    localStorage.setItem('openai_api_key', apiKey);
                }
            }
            return apiKey;
        }

        function blobToBase64(blob) {
            return new Promise((resolve, reject) => {
                const reader = new FileReader();
                reader.onloadend = () => resolve(reader.result.split(',')[1]);
                reader.onerror = reject;
                reader.readAsDataURL(blob);
            });
        }
    </script>
</body>
</html>

assistant (Oct 17, 2024, 12:18 PM)

I understand. We'll be modifying the tool described in the HTML and JavaScript code you've provided. This appears to be a web application for recording audio, submitting it to the OpenAI API along with a text prompt, and displaying the API response. What specific modifications would you like to make?

human (Oct 17, 2024, 12:19 PM)

First, make it so clicking the submit button changes the text on that button to "Processing prompt..." while it is running, and disables the button. The JSON returned looks like this:

{
  "id": "chatcmpl-AJQCDfMBi53K4mVLF0CZ6ZoHdhB2M",
  "object": "chat.completion",
  "created": 1729192293,
  "model": "gpt-4o-audio-preview-2024-10-01",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "Stuff upon stuff, yes  \nPiled high, a mountain of things  \nLife in abundance",
        "refusal": null
      },
      "finish_reason": "stop",
      "internal_metrics": []
    }
  ],
  "usage": {
    "prompt_tokens": 43,
    "completion_tokens": 18,
    "total_tokens": 61,
    "prompt_tokens_details": {
      "cached_tokens": 0,
      "cached_tokens_internal": 0,
      "text_tokens": 17,
      "image_tokens": 0,
      "audio_tokens": 26
    },
    "completion_tokens_details": {
      "reasoning_tokens": 0,
      "text_tokens": 18,
      "audio_tokens": 0
    }
  },
  "system_fingerprint": "fp_6e2d124157"
}

Show a more readable version of this. In particular, show the content fields from the choices messages that come back, joined together and rendered with markdown Then show the number of input text tokens and input audio_tokens and calculate their total cost in cents - using $100.00 / 1M tokens for audio input tokens and $2.50/million for text input tokens Show the JSON below that.

assistant (Oct 17, 2024, 12:19 PM)

I understand your requirements. I'll modify the code to implement these changes. Here's the updated version of the JavaScript part that handles the API submission and response display:

Updated OpenAI Audio Tool JavaScript

// ... (previous code remains the same)

async function submitToAPI() {
    if (!audioBlob) {
        alert('Please record audio first.');
        return;
    }

    const apiKey = getAPIKey();
    if (!apiKey) {
        alert('API Key is required.');
        return;
    }

    submitButton.textContent = 'Processing prompt...';
    submitButton.disabled = true;

    const base64Audio = await blobToBase64(audioBlob);
    const prompt = promptTextarea.value;

    const payload = {
        model: "gpt-4o-audio-preview",
        modalities: ["text"],
        messages: [
            {
                role: "user",
                content: [
                    {type: "text", text: prompt},
                    {
                        type: "input_audio",
                        input_audio: {
                            data: base64Audio,
                            format: "wav"
                        }
                    }
                ]
            }
        ]
    };

    try {
        const response = await fetch('https://api.openai.com/v1/chat/completions', {
            method: 'POST',
            headers: {
                'Content-Type': 'application/json',
                'Authorization': `Bearer ${apiKey}`
            },
            body: JSON.stringify(payload)
        });

        const data = await response.json();
        displayFormattedResponse(data);
    } catch (error) {
        console.error('Error:', error);
        apiResponse.textContent = `Error: ${error.message}`;
    } finally {
        submitButton.textContent = 'Submit to API';
        submitButton.disabled = false;
    }
}

function displayFormattedResponse(data) {
    let formattedResponse = '';

    // Display content from choices
    if (data.choices && data.choices.length > 0) {
        const contents = data.choices.map(choice => choice.message.content).join('\n\n');
        formattedResponse += `### Response Content\n\n${contents}\n\n`;
    }

    // Calculate and display token usage and cost
    if (data.usage) {
        const textTokens = data.usage.prompt_tokens_details.text_tokens || 0;
        const audioTokens = data.usage.prompt_tokens_details.audio_tokens || 0;
        const textCost = (textTokens * 2.50) / 1000000;
        const audioCost = (audioTokens * 100.00) / 1000000;
        const totalCost = textCost + audioCost;

        formattedResponse += `### Token Usage and Cost\n\n`;
        formattedResponse += `- Text input tokens: ${textTokens}\n`;
        formattedResponse += `- Audio input tokens: ${audioTokens}\n`;
        formattedResponse += `- Total cost: ${totalCost.toFixed(4)} cents\n\n`;
    }

    // Display full JSON response
    formattedResponse += `### Full JSON Response\n\n\`\`\`json\n${JSON.stringify(data, null, 2)}\n\`\`\``;

    // Use a library like marked.js to render markdown, or implement a simple markdown renderer
    apiResponse.innerHTML = renderMarkdown(formattedResponse);
}

function renderMarkdown(markdown) {
    // This is a very basic markdown renderer. For a full solution, consider using a library like marked.js
    return markdown
        .replace(/### (.*)/g, '<h3>$1</h3>')
        .replace(/\n/g, '<br>')
        .replace(/`{3}(\w+)?\n([\s\S]*?)\n`{3}/g, '<pre><code>$2</code></pre>');
}

// ... (rest of the code remains the same)

This updated code does the following:

  1. Changes the submit button text to "Processing prompt..." and disables it while the API request is in progress.
  2. Adds a new displayFormattedResponse function that formats the API response in a more readable way:
    • It displays the content from the choices' messages.
    • It calculates and shows the number of input text tokens and audio tokens, along with their total cost in cents.
    • It displays the full JSON response below the formatted information.
  3. Includes a basic renderMarkdown function to render the formatted response with some simple markdown-like styling.

To fully implement this, you'll need to make sure the HTML structure can accommodate the new formatting. You might want to consider adding a CSS file or inline styles to improve the presentation of the formatted response.

Also, for a more robust markdown rendering solution, consider incorporating a library like marked.js. This example uses a very basic markdown renderer for simplicity.

Would you like me to provide any additional modifications or explanations?

human (Oct 17, 2024, 12:23 PM)

Use import { marked } from "https://esm.run/marked"; for the markdown render

Output the JSON directly into a

 tag - do not use markdown for that bit

Output the entire updated HTML

assistant (Oct 17, 2024, 12:23 PM)

Certainly. I'll incorporate the marked library for markdown rendering and update the HTML structure to accommodate these changes. Here's the complete updated HTML file:

Complete OpenAI Audio Tool HTML

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>OpenAI Audio</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            display: flex;
            justify-content: center;
            align-items: center;
            min-height: 100vh;
            margin: 0;
            background-color: #f0f0f0;
        }
        .container {
            text-align: center;
            background-color: white;
            padding: 2rem;
            border-radius: 8px;
            box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
            max-width: 800px;
            width: 100%;
        }
        button {
            font-size: 1rem;
            padding: 0.5rem 1rem;
            margin: 0.5rem;
            cursor: pointer;
        }
        #timer {
            font-size: 1.5rem;
            margin: 1rem 0;
        }
        #audioPlayback, #prompt {
            margin-top: 1rem;
            width: 100%;
        }
        #prompt {
            height: 100px;
            resize: vertical;
        }
        #apiResponse {
            margin-top: 1rem;
            text-align: left;
            overflow-x: auto;
        }
        #formattedResponse {
            background-color: #f8f8f8;
            padding: 1rem;
            border-radius: 4px;
            margin-bottom: 1rem;
        }
        #jsonResponse {
            background-color: #f8f8f8;
            padding: 1rem;
            border-radius: 4px;
            overflow-x: auto;
        }
    </style>
</head>
<body>
    <div class="container">
        <h1>OpenAI Audio</h1>
        <button id="recordButton">Start Recording</button>
        <div id="timer">00:00</div>
        <audio id="audioPlayback" controls></audio>
        <textarea id="prompt" placeholder="Enter your prompt here"></textarea>
        <button id="submitButton">Submit to API</button>
        <div id="apiResponse">
            <div id="formattedResponse"></div>
            <pre id="jsonResponse"></pre>
        </div>
    </div>

    <script type="module">
        import { marked } from "https://esm.run/marked";

        let audioContext;
        let recorder;
        let audioChunks = [];
        let startTime;
        let timerInterval;
        let audioBlob;
        let isRecording = false;
        const recordButton = document.getElementById('recordButton');
        const timer = document.getElementById('timer');
        const audioPlayback = document.getElementById('audioPlayback');
        const promptTextarea = document.getElementById('prompt');
        const submitButton = document.getElementById('submitButton');
        const formattedResponse = document.getElementById('formattedResponse');
        const jsonResponse = document.getElementById('jsonResponse');

        recordButton.addEventListener('click', toggleRecording);
        submitButton.addEventListener('click', submitToAPI);

        async function toggleRecording() {
            if (!isRecording) {
                await startRecording();
            } else {
                stopRecording();
            }
        }

        async function startRecording() {
            try {
                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                audioContext = new (window.AudioContext || window.webkitAudioContext)();
                const source = audioContext.createMediaStreamSource(stream);
                const processor = audioContext.createScriptProcessor(1024, 1, 1);

                source.connect(processor);
                processor.connect(audioContext.destination);

                audioChunks = [];

                processor.onaudioprocess = (e) => {
                    const inputData = e.inputBuffer.getChannelData(0);
                    audioChunks.push(new Float32Array(inputData));
                };

                isRecording = true;
                startTime = Date.now();
                updateTimer();
                timerInterval = setInterval(updateTimer, 1000);
                recordButton.textContent = 'Stop Recording';
            } catch (error) {
                console.error('Error starting recording:', error);
                alert('Error starting recording. Please make sure you have given permission to use the microphone.');
            }
        }

        function stopRecording() {
            if (audioContext) {
                audioContext.close();
                audioContext = null;
            }
            clearInterval(timerInterval);
            recordButton.textContent = 'Start Recording';
            isRecording = false;

            // Convert to WAV
            const wavBlob = createWavBlob(audioChunks);
            audioBlob = wavBlob;

            const audioUrl = URL.createObjectURL(audioBlob);
            audioPlayback.src = audioUrl;
        }

        function updateTimer() {
            const elapsed = Math.floor((Date.now() - startTime) / 1000);
            const minutes = Math.floor(elapsed / 60).toString().padStart(2, '0');
            const seconds = (elapsed % 60).toString().padStart(2, '0');
            timer.textContent = `${minutes}:${seconds}`;
        }

        function createWavBlob(audioChunks) {
            const sampleRate = 44100;
            const numChannels = 1;
            const bitsPerSample = 16;
            const bytesPerSample = bitsPerSample / 8;
            const blockAlign = numChannels * bytesPerSample;

            const buffer = mergeAudioBuffers(audioChunks);
            const dataLength = buffer.length * bytesPerSample;
            const wavDataLength = 36 + dataLength;

            const headerBuffer = new ArrayBuffer(44);
            const view = new DataView(headerBuffer);

            writeString(view, 0, 'RIFF');
            view.setUint32(4, wavDataLength, true);
            writeString(view, 8, 'WAVE');
            writeString(view, 12, 'fmt ');
            view.setUint32(16, 16, true);
            view.setUint16(20, 1, true);
            view.setUint16(22, numChannels, true);
            view.setUint32(24, sampleRate, true);
            view.setUint32(28, sampleRate * blockAlign, true);
            view.setUint16(32, blockAlign, true);
            view.setUint16(34, bitsPerSample, true);
            writeString(view, 36, 'data');
            view.setUint32(40, dataLength, true);

            const wavBuffer = new Int16Array(headerBuffer.byteLength + dataLength);
            wavBuffer.set(new Int16Array(headerBuffer));
            wavBuffer.set(convertToInt16(buffer), headerBuffer.byteLength / 2);

            return new Blob([wavBuffer], { type: 'audio/wav' });
        }

        function writeString(view, offset, string) {
            for (let i = 0; i < string.length; i++) {
                view.setUint8(offset + i, string.charCodeAt(i));
            }
        }

        function mergeAudioBuffers(buffers) {
            let totalLength = 0;
            for (let buffer of buffers) {
                totalLength += buffer.length;
            }
            const result = new Float32Array(totalLength);
            let offset = 0;
            for (let buffer of buffers) {
                result.set(buffer, offset);
                offset += buffer.length;
            }
            return result;
        }

        function convertToInt16(float32Array) {
            const int16Array = new Int16Array(float32Array.length);
            for (let i = 0; i < float32Array.length; i++) {
                const s = Math.max(-1, Math.min(1, float32Array[i]));
                int16Array[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
            }
            return int16Array;
        }

        async function submitToAPI() {
            if (!audioBlob) {
                alert('Please record audio first.');
                return;
            }

            const apiKey = getAPIKey();
            if (!apiKey) {
                alert('API Key is required.');
                return;
            }

            submitButton.textContent = 'Processing prompt...';
            submitButton.disabled = true;

            const base64Audio = await blobToBase64(audioBlob);
            const prompt = promptTextarea.value;

            const payload = {
                model: "gpt-4o-audio-preview",
                modalities: ["text"],
                messages: [
                    {
                        role: "user",
                        content: [
                            {type: "text", text: prompt},
                            {
                                type: "input_audio",
                                input_audio: {
                                    data: base64Audio,
                                    format: "wav"
                                }
                            }
                        ]
                    }
                ]
            };

            try {
                const response = await fetch('https://api.openai.com/v1/chat/completions', {
                    method: 'POST',
                    headers: {
                        'Content-Type': 'application/json',
                        'Authorization': `Bearer ${apiKey}`
                    },
                    body: JSON.stringify(payload)
                });

                const data = await response.json();
                displayFormattedResponse(data);
            } catch (error) {
                console.error('Error:', error);
                formattedResponse.innerHTML = marked(`Error: ${error.message}`);
                jsonResponse.textContent = '';
            } finally {
                submitButton.textContent = 'Submit to API';
                submitButton.disabled = false;
            }
        }

        function displayFormattedResponse(data) {
            let mdResponse = '';

            // Display content from choices
            if (data.choices && data.choices.length > 0) {
                const contents = data.choices.map(choice => choice.message.content).join('\n\n');
                mdResponse += `### Response Content\n\n${contents}\n\n`;
            }

            // Calculate and display token usage and cost
            if (data.usage) {
                const textTokens = data.usage.prompt_tokens_details.text_tokens || 0;
                const audioTokens = data.usage.prompt_tokens_details.audio_tokens || 0;
                const textCost = (textTokens * 2.50) / 1000000;
                const audioCost = (audioTokens * 100.00) / 1000000;
                const totalCost = textCost + audioCost;

                mdResponse += `### Token Usage and Cost\n\n`;
                mdResponse += `- Text input tokens: ${textTokens}\n`;
                mdResponse += `- Audio input tokens: ${audioTokens}\n`;
                mdResponse += `- Total cost: ${totalCost.toFixed(4)} cents\n\n`;
            }

            // Render markdown response
            formattedResponse.innerHTML = marked(mdResponse);

            // Display full JSON response
            jsonResponse.textContent = JSON.stringify(data, null, 2);
        }

        function getAPIKey() {
            let apiKey = localStorage.getItem('openai_api_key');
            if (!apiKey) {
                apiKey = prompt('Please enter your OpenAI API Key:');
                if (apiKey) {
                    localStorage.setItem('openai_api_key', apiKey);
                }
            }
            return apiKey;
        }

        function blobToBase64(blob) {
            return new Promise((resolve, reject) => {
                const reader = new FileReader();
                reader.onloadend = () => resolve(reader.result.split(',')[1]);
                reader.onerror = reject;
                reader.readAsDataURL(blob);
            });
        }
    </script>
</body>
</html>

This updated HTML file incorporates all the requested changes:

  1. The submit button now changes its text to "Processing prompt..." and is disabled while the API request is in progress.
  2. The marked library is imported from a CDN for markdown rendering.
  3. The API response is now split into two parts:
    • A formatted response (rendered as markdown) in the formattedResponse div.
    • The raw JSON output in a <pre> tag with the id jsonResponse.
  4. The displayFormattedResponse function now:
    • Renders the content from the choices' messages using markdown.
    • Calculates and shows the number of input text tokens and audio tokens, along with their total cost in cents.
    • Displays the full JSON response in the jsonResponse pre tag without using markdown.

The styling has been slightly adjusted to accommodate the new structure, with separate styling for the formatted response and the JSON output.

This HTML file is now a complete, self-contained web application that can record audio, submit it to the OpenAI API along with a text prompt, and display the response in a more readable format, including token usage and cost information.

human (Oct 17, 2024, 12:27 PM)

You output the total cost in dollars but I want it in cents, fix that

assistant (Oct 17, 2024, 12:27 PM)

You're absolutely right, and I apologize for that oversight. I'll modify the code to display the total cost in cents as requested. Here's the corrected part of the displayFormattedResponse function:

Updated displayFormattedResponse function

function displayFormattedResponse(data) {
    let mdResponse = '';

    // Display content from choices
    if (data.choices && data.choices.length > 0) {
        const contents = data.choices.map(choice => choice.message.content).join('\n\n');
        mdResponse += `### Response Content\n\n${contents}\n\n`;
    }

    // Calculate and display token usage and cost
    if (data.usage) {
        const textTokens = data.usage.prompt_tokens_details.text_tokens || 0;
        const audioTokens = data.usage.prompt_tokens_details.audio_tokens || 0;
        const textCost = (textTokens * 2.50) / 10000; // Cost in cents
        const audioCost = (audioTokens * 100.00) / 10000; // Cost in cents
        const totalCost = textCost + audioCost;

        mdResponse += `### Token Usage and Cost\n\n`;
        mdResponse += `- Text input tokens: ${textTokens}\n`;
        mdResponse += `- Audio input tokens: ${audioTokens}\n`;
        mdResponse += `- Total cost: ${totalCost.toFixed(4)} cents\n\n`;
    }

    // Render markdown response
    formattedResponse.innerHTML = marked(mdResponse);

    // Display full JSON response
    jsonResponse.textContent = JSON.stringify(data, null, 2);
}

In this updated version:

  1. The calculation for textCost and audioCost has been modified to output the result in cents. We divide by 10000 instead of 1000000 to convert the rate from "per million tokens" to cents.
  2. The totalCost is now correctly in cents.
  3. The output string still says "cents" at the end, which is now correct.

This change ensures that the total cost is displayed in cents as requested. The rest of the HTML file remains the same. You can replace the displayFormattedResponse function in the full HTML with this updated version to implement this correction.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment