Created
November 24, 2023 00:00
-
-
Save sarchak/931c5427aefb572f9ad92b2825f924b7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const WebSocket = require("ws"); | |
const express = require("express"); | |
const WaveFile = require("wavefile").WaveFile; | |
const axios = require("axios"); | |
const fs = require("fs"); | |
const ffmpeg = require("fluent-ffmpeg"); | |
const path = require("path"); | |
const app = express(); | |
const server = require("http").createServer(app); | |
const wss = new WebSocket.Server({ server }); | |
let assembly; | |
let chunks = []; | |
async function convertMP3toMuLawAndPrepareForTwilio(mp3Url, streamSid) { | |
try { | |
// Download MP3 | |
const response = await axios({ | |
url: mp3Url, | |
method: "GET", | |
responseType: "arraybuffer", | |
}); | |
const tempMP3File = "tempfile.mp3"; | |
fs.writeFileSync(tempMP3File, Buffer.from(response.data)); | |
return new Promise((resolve, reject) => { | |
ffmpeg(tempMP3File) | |
.toFormat("wav") | |
.on("end", () => { | |
const wavData = fs.readFileSync(tempMP3File.replace(".mp3", ".wav")); | |
let wav = new WaveFile(wavData); | |
wav.toMuLaw(); | |
// Extract only the raw µ-law audio data, excluding headers | |
const rawMuLawData = wav.data.samples; | |
// Split the buffer into 20ms chunks | |
let chunkSize = 320; // For 8kHz µ-law audio, 20ms is 320 bytes | |
let twilioMessages = []; | |
for (let i = 0; i < rawMuLawData.length; i += chunkSize) { | |
let chunk = rawMuLawData.slice(i, i + chunkSize); | |
let base64Chunk = Buffer.from(chunk).toString("base64"); | |
let message = { | |
event: "media", | |
streamSid: streamSid, | |
media: { | |
payload: base64Chunk, | |
}, | |
}; | |
twilioMessages.push(message); | |
} | |
// Clean up temporary files | |
// fs.unlinkSync(tempMP3File); | |
// fs.unlinkSync(tempMP3File.replace(".mp3", ".wav")); | |
resolve(twilioMessages); | |
}) | |
.on("error", (err) => { | |
console.error("An error occurred: " + err.message); | |
reject(err); | |
}) | |
.save(tempMP3File.replace(".mp3", ".wav")); | |
}); | |
} catch (error) { | |
console.error("Error in convertMP3toMuLawAndPrepareForTwilio:", error); | |
} | |
} | |
// Handle Web Socket Connection | |
wss.on("connection", async function connection(ws) { | |
console.log("New Connection Initiated"); | |
let streamSid; | |
ws.on("message", async function incoming(message) { | |
if (!assembly) | |
return console.error("AssemblyAI's WebSocket must be initialized."); | |
const msg = JSON.parse(message); | |
switch (msg.event) { | |
case "connected": | |
console.log(`A new call has connected.`); | |
assembly.onerror = console.error; | |
const texts = {}; | |
assembly.onmessage = (assemblyMsg) => { | |
const res = JSON.parse(assemblyMsg.data); | |
texts[res.audio_start] = res.text; | |
const keys = Object.keys(texts); | |
keys.sort((a, b) => a - b); | |
let msg = ""; | |
for (const key of keys) { | |
if (texts[key]) { | |
msg += ` ${texts[key]}`; | |
} | |
} | |
console.log(msg); | |
wss.clients.forEach((client) => { | |
if (client.readyState === WebSocket.OPEN) { | |
client.send( | |
JSON.stringify({ | |
event: "interim-transcription", | |
text: msg, | |
}) | |
); | |
} | |
}); | |
console.log("=============="); | |
console.log(msg, streamSid); | |
let mp3Url = | |
"https://eleven-public-cdn.elevenlabs.io/audio-native/d65e433fd8a560cbba1b7dd26809dd48ea2b408c5b6c0a3e42e5b83c43957f5b/fUQGnaXq583QFBHJUhLY.mp3"; | |
convertMP3toMuLawAndPrepareForTwilio(mp3Url, streamSid) | |
.then((twilioMessages) => { | |
twilioMessages.forEach((message1) => { | |
console.log(message1); | |
ws.send(JSON.stringify(message1)); | |
}); | |
ws.send( | |
JSON.stringify({ | |
event: "mark", | |
streamSid: streamSid, | |
mark: { | |
name: "my label", | |
}, | |
}) | |
); | |
}) | |
.catch((error) => { | |
console.error("Error processing MP3 to MuLaw:", error); | |
}); | |
}; | |
break; | |
case "start": | |
console.log(`Starting Media Stream ${msg.streamSid}`); | |
streamSid = msg.streamSid; | |
break; | |
case "media": | |
const twilioData = msg.media.payload; | |
// Build the wav file from scratch since it comes in as raw data | |
let wav = new WaveFile(); | |
// Twilio uses MuLaw so we have to encode for that | |
wav.fromScratch(1, 8000, "8m", Buffer.from(twilioData, "base64")); | |
// This library has a handy method to decode MuLaw straight to 16-bit PCM | |
wav.fromMuLaw(); | |
// Get the raw audio data in base64 | |
const twilio64Encoded = wav.toDataURI().split("base64,")[1]; | |
// Create our audio buffer | |
const twilioAudioBuffer = Buffer.from(twilio64Encoded, "base64"); | |
// Send data starting at byte 44 to remove wav headers so our model sees only audio data | |
chunks.push(twilioAudioBuffer.slice(44)); | |
// We have to chunk data b/c twilio sends audio durations of ~20ms and AAI needs a min of 100ms | |
if (chunks.length >= 5) { | |
const audioBuffer = Buffer.concat(chunks); | |
const encodedAudio = audioBuffer.toString("base64"); | |
assembly.send(JSON.stringify({ audio_data: encodedAudio })); | |
chunks = []; | |
} | |
break; | |
case "stop": | |
console.log(`Call Has Ended`); | |
assembly.send(JSON.stringify({ terminate_session: true })); | |
break; | |
} | |
}); | |
}); | |
//Handle HTTP Request | |
app.get("/", (req, res) => res.sendFile(path.join(__dirname, "/index.html"))); | |
app.post("/", async (req, res) => { | |
console.log("Post called"); | |
console.log(`${req.headers.host}`); | |
assembly = new WebSocket( | |
"wss://api.assemblyai.com/v2/realtime/ws?sample_rate=8000", | |
{ headers: { authorization: "key" } } | |
); | |
res.set("Content-Type", "text/xml"); | |
res.send( | |
`<Response> | |
<Start> | |
<Stream url='wss://assistant.loca.lt' /> | |
</Start> | |
<Say> | |
Start | |
</Say> | |
<Pause length='30' /> | |
</Response>` | |
); | |
}); | |
// Start server | |
console.log("Listening at Port 8080"); | |
server.listen(8080); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment