Last active
December 19, 2025 13:53
-
-
Save CodeWithOz/d254c811d5a20c2083f593d0a856f875 to your computer and use it in GitHub Desktop.
TTS logic for Listen Better demo
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ... | |
| async function convertAudioContentToMp3( | |
| audioContent: GeminiTTSAudioContent, | |
| opts?: { workDir?: string; baseName?: string } | |
| ): Promise<string> { | |
| if (!audioContent || !audioContent.data) { | |
| throw new Error('No audio content provided'); | |
| } | |
| const mimeType = audioContent.mimeType; | |
| const audioPcmData = audioContent.data; | |
| const pcmBuffer = Buffer.from(audioPcmData, 'base64'); | |
| // Parse MIME type like: audio/L16;codec=pcm;rate=24000 | |
| let parsedSampleRate = 24000; | |
| let parsedBitDepth = 16; | |
| if (typeof mimeType === 'string') { | |
| const rateMatch = mimeType.match(/(?:^|[;\s])rate=(\d+)/i); | |
| const depthMatch = mimeType.match(/^audio\/L(\d+)/i); | |
| if (rateMatch) | |
| parsedSampleRate = parseInt(rateMatch[1], 10) || parsedSampleRate; | |
| if (depthMatch) | |
| parsedBitDepth = parseInt(depthMatch[1], 10) || parsedBitDepth; | |
| } | |
| const outputDir = opts?.workDir || os.tmpdir(); | |
| const baseName = opts?.baseName || `dialogue_${Date.now()}`; | |
| await pcmBufferToMp3(pcmBuffer, outputDir, baseName, { | |
| sampleRate: parsedSampleRate, | |
| channels: 1, | |
| bitrate: '192k', | |
| codec: 'libmp3lame', | |
| bitDepth: parsedBitDepth, | |
| }); | |
| console.log(`[convertAudioContentToMp3] MP3 written to ${outputDir}/${baseName}.mp3 (${audioContent.data.length} bytes)`); | |
| return `${outputDir}/${baseName}.mp3`; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment