-
-
Save DavidBuchanan314/71c9ea55d995d952c55349c4bda3eaf8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
type "listen" to send 5 seconds of mic input to the server. type anything else to send input to "terminal" mode. | |
replace REDACTED subsections with your own values ;) | |
""" | |
import aiohttp | |
import asyncio | |
import ssl | |
import io | |
import json | |
import base64 | |
import time | |
import pyaudio | |
import wave | |
from prompt_toolkit import prompt, styles, PromptSession | |
from prompt_toolkit.patch_stdout import patch_stdout | |
from pygame import mixer | |
audio = pyaudio.PyAudio() | |
FORMAT = pyaudio.paInt16 | |
CHANNELS = 2 | |
RATE = 16000 | |
RECORD_SECONDS = 5 | |
CHUNK = RATE // 10 # 0.1s chunks | |
def convert_to_wav(frames: bytes) -> bytes: | |
buf = io.BytesIO() | |
wf = wave.open(buf, "wb") | |
wf.setnchannels(CHANNELS) | |
wf.setsampwidth(audio.get_sample_size(FORMAT)) | |
wf.setframerate(RATE) | |
wf.writeframes(frames) | |
wf.close() | |
return buf.getvalue() | |
mixer.init() | |
ssl_ctx = ssl._create_unverified_context() # our proxy has untrusted cert | |
IMEI = "REDACTED" | |
ACCOUNT_KEY = "REDACTED" | |
login_msg = { | |
"global": { | |
"initialize": { | |
"deviceId": IMEI, | |
"evaluate": False, | |
"greet": True, | |
"language": "en", | |
"listening": True, | |
"location": { | |
"latitude": 0.0, | |
"longitude": 0.0 | |
}, | |
"mimeType": "wav", | |
"timeZone": "GMT", | |
"token": "rabbit-account-key+" + ACCOUNT_KEY, | |
} | |
} | |
} | |
async def print_msgs(ws: aiohttp.ClientSession): | |
async for msg in ws: | |
assert(type(msg) is aiohttp.WSMessage) | |
if not msg.type == aiohttp.WSMsgType.TEXT: | |
continue | |
msg_json = json.loads(msg.data) | |
if "assistantResponseDevice" in msg_json.get("kernel", {}): | |
buf = base64.b64decode(msg_json["kernel"]["assistantResponseDevice"]["audio"]) | |
mixer.Sound(io.BytesIO(buf)).play() | |
continue | |
print(msg_json) | |
async def main(): | |
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=ssl_ctx)) as session: | |
async with session.ws_connect( | |
"wss://r1-api.rabbit.tech/session", | |
proxy="http://localhost:8080/", | |
headers={ | |
"User-Agent": "okhttp/4.9.1", | |
"App-Version": "20240429.6-1-g66aa0f20", | |
"OS-Version": "REDACTED", | |
} | |
) as ws: | |
print("[+] connected") | |
await ws.send_json(login_msg) | |
promptsess = PromptSession("> ", style=styles.Style.from_dict({'': 'ansired'})) | |
asyncio.create_task(print_msgs(ws)) | |
while True: | |
line = await promptsess.prompt_async() | |
if line == "listen": | |
await ws.send_json({ | |
"kernel": { | |
"voiceActivity": { | |
"imageBase64": "", | |
"state": "pttButtonPressed" | |
} | |
} | |
}) | |
stream = audio.open( | |
format=FORMAT, | |
channels=CHANNELS, | |
rate=RATE, | |
input=True, | |
frames_per_buffer=CHUNK | |
) | |
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)): | |
print("chunk") | |
data = await asyncio.to_thread(stream.read, CHUNK) | |
wav = convert_to_wav(data) | |
await ws.send_bytes(wav) | |
stream.stop_stream() | |
stream.close() | |
await ws.send_json({ | |
"kernel": { | |
"voiceActivity": { | |
"imageBase64": "", | |
"state": "pttButtonReleased" | |
} | |
} | |
}) | |
else: | |
await ws.send_json({ | |
"kernel": { | |
"userText": { | |
"text": line | |
} | |
} | |
}) | |
with patch_stdout(): | |
asyncio.run(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment