Skip to content

Instantly share code, notes, and snippets.

@witt3rd
Forked from crjaensch/ADA_v2_README.md
Created July 1, 2024 03:17
Show Gist options
  • Save witt3rd/4efc8c0ad4f79187b6d8140023944989 to your computer and use it in GitHub Desktop.
Save witt3rd/4efc8c0ad4f79187b6d8140023944989 to your computer and use it in GitHub Desktop.
Personal AI Assistant: 'Ada' - v0.2

This is not working complete code.

This is strictly a v0.2, scrapy, proof of concept version of a personal AI Assistant working end to end in just ~726 LOC.

This is the second iteration showcasing the two-way prompt aka multi-step human in the loop. The initial, v0, assistant version is here.

It's only a frame of reference for you to consume the core ideas of how to build a POC of a personal AI Assistant.

To see the high level of how this works check out the explanation video. To follow our agentic journey check out the @IndyDevDan channel.

Stay focused, keep building.

import subprocess
import os
import random
import time
def edit(contents: str):
"""
Opens TextEdit on macOS and waits until it is closed to proceed.
"""
# Get the current working directory
current_dir = os.getcwd()
# Generate a random number to include in the filename
random_number = random.randint(1000, 9999)
temp_file_path = os.path.join(current_dir, f'tempfile_{random_number}.json')
# Create and close the temporary file explicitly
with open(temp_file_path, 'w+') as tmp:
tmp.write(contents)
tmp.flush()
# Change the file permissions to make it readable and writable by everyone
os.chmod(temp_file_path, 0o666)
# Introduce a delay
time.sleep(1) # Wait for 1 second before opening the file in Editor
# Open the default text editor and wait for it to close
editor_process = subprocess.Popen(['open', '-W', '-n', '-a', 'TextEdit', temp_file_path])
# Wait for the TextEdit process to close
editor_process.wait()
# Read the modified content from the file
with open(temp_file_path, 'r') as file:
modified_content = file.read()
# Clean up by removing the temporary file
os.remove(temp_file_path)
return modified_content
# Example usage:
if __name__ == "__main__":
sample_contents = "How are you doing this. Tell me more about it:"
modified_config = edit(sample_contents)
print(modified_config)
import subprocess
import tkinter as tk
from tkinter import filedialog
from modules import editor
def open_file() -> str:
"""Opens a file selection dialog and returns the selected file path."""
root = tk.Tk()
root.withdraw() # Hide the main window
file_path = filedialog.askopenfile()
if not file_path:
return None
root.destroy()
return file_path.name
def open_editor() -> str:
return editor.edit(contents="")
def open_file_in_editor_and_continue(file: str) -> None:
"""Opens a file in the editor using the 'code' command and allows the user to continue editing."""
if file:
subprocess.run(["code", file])
else:
print("No file provided to open.")
import base64
import google.generativeai as genai
import os
from dotenv import load_dotenv
from pydantic import BaseModel
from modules import parsers
import openai
# Load environment variables from .env file
load_dotenv()
api_key = os.environ["GOOGLE_API_KEY"]
openai.api_key = os.environ.get("OPENAI_API_KEY")
# Initialize Google API Client
genai.configure(api_key=api_key)
def gpro_1_5_prompt(prompt) -> str:
"""
Generates content based on the provided prompt using the Gemini 1.5 API model and returns the text part of the first candidate's content.
Args:
- prompt (str): The prompt to generate content for.
Returns:
- str: The text part of the first candidate's content from the generated response.
"""
model_name = "models/gemini-1.5-pro-latest"
gen_config = genai.GenerationConfig()
model = genai.GenerativeModel(model_name=model_name)
response = model.generate_content(prompt, request_options={})
return response.candidates[0].content.parts[0].text
def gpro_1_5_prompt_with_model(prompt, pydantic_model: BaseModel) -> BaseModel:
"""
Generates content based on the provided prompt using the Gemini 1.5 API model and returns the text part of the first candidate's content.
Args:
- prompt (str): The prompt to generate content for.
Returns:
- str: The text part of the first candidate's content from the generated response.
"""
model_name = "models/gemini-1.5-pro-latest"
gen_config = genai.GenerationConfig()
model = genai.GenerativeModel(model_name=model_name)
response = model.generate_content(prompt, request_options={})
response_text = response.candidates[0].content.parts[0].text
if "```json" in response_text:
return pydantic_model.model_validate(
parsers.parse_json_from_gemini(response_text)
)
else:
return pydantic_model.model_validate_json(response_text)
def gpt4t_w_vision_json_prompt(
prompt: str,
model: str = "gpt-4-turbo-2024-04-09",
instructions: str = "You are a helpful assistant that response in JSON format.",
pydantic_model: BaseModel = None,
) -> str:
response = openai.chat.completions.create(
model=model,
messages=[
{
"role": "system",
"content": instructions, # Added instructions as a system message
},
{
"role": "user",
"content": prompt,
},
],
response_format={"type": "json_object"},
)
response_text = response.choices[0].message.content
print(f"Text LLM response: {response_text}")
as_model = pydantic_model.model_validate_json(response_text)
return as_model
def gpt4t_w_vision(
prompt: str,
model: str = "gpt-4-turbo-2024-04-09",
instructions: str = "You are a helpful assistant.",
) -> str:
response = openai.chat.completions.create(
model=model,
messages=[
{
"role": "system",
"content": instructions, # Added instructions as a system message
},
{
"role": "user",
"content": prompt,
},
],
)
response_text = response.choices[0].message.content
return response_text
def encode_image(image_path: str) -> str:
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
def gpt4t_w_vision_image_with_model(
prompt: str,
file_path: str,
model: str = "gpt-4-turbo-2024-04-09",
instructions: str = "You are a helpful assistant that specializes in image analysis.",
pydantic_model: BaseModel = None,
):
file_extension = file_path.split(".")[-1]
base64_image = encode_image(file_path)
print("base64_image", base64_image)
response = openai.chat.completions.create(
model=model,
messages=[
{
"role": "system",
"content": instructions,
},
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt,
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/{file_extension};base64,{base64_image}"
},
},
],
},
],
response_format={"type": "json_object"},
)
print("response", response)
response_text = response.choices[0].message.content
print("response_text", response_text)
parsed_response = pydantic_model.model_validate_json(response_text)
return parsed_response
# ADA - Personal AI Assistant (v0.2.1)
# Proof of Concept
# CHANGES: (v0.2.1)
# - using textwrap.dedent() for better formatting of multi-line prompts
# - using deepgram instead of assembly ai for audio-to-text transcription
# - minor changes to the code generation prompts
import sys
from tkinter import Tk, filedialog
import tkinter
from pydantic import BaseModel
import sounddevice as sd
import wave
from datetime import datetime
import os
from dotenv import load_dotenv
from elevenlabs import play
from elevenlabs.client import ElevenLabs
import subprocess
import pyperclip
from modules import human_in_the_loop, llm
from modules import editor
import json
import difflib
import requests
from bs4 import BeautifulSoup
from markdownify import markdownify
from textwrap import dedent
from deepgram import (
DeepgramClient,
PrerecordedOptions,
)
load_dotenv()
ACTIVATION_KEYWORD = "Ada"
PERSONAL_AI_ASSISTANT_NAME = "ADA"
CONFIG_FILE = "./config.json"
HUMAN_COMPANION_NAME = "Chris"
PERSONAL_AI_ASSISTANT_PROMPT_HEAD = dedent(
f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.
You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.
We both like short, concise, back-and-forth conversations.
""")
try:
with open(CONFIG_FILE, "r") as config_file:
configuration = json.load(config_file)
except FileNotFoundError:
configuration = {
"working_directory": None,
}
# write
with open(CONFIG_FILE, "w") as config_file:
json.dump(configuration, config_file, indent=2)
channels = 1
iteration_start_time = None
# --------------------- Agent Workflows ---------------------
def get_simple_keyword_ai_agent_router():
"""
Decision making based on contents of prompt (Simple LLM Router).
"""
return {
# v0.2 New Flows w/two-way prompts
"configure,configuration": configure_assistant_workflow,
"example code": example_code_workflow,
"view component": image_to_vue_component_workflow,
#
"bash,browser": run_bash_command_workflow, # AI Agent | Agentic Workflow
"shell": shell_command_workflow, # AI Agent | Agentic Workflow
"question": question_answer_workflow, # AI Agent | Agentic Workflow
"hello,hey,hi": soft_talk_workflow, # AI Agent | Agentic Workflow
#
"exit": end_conversation_workflow,
}
def image_to_vue_component_workflow(prompt: str):
"""
Generate a Vue component from an image
"""
class VueComponentResponse(BaseModel):
vue_component: str
class FileNameResponse(BaseModel):
file_name: str
speak(build_feedback_prompt("Select an image to generate a Vue component from."))
open_file_path = human_in_the_loop.open_file()
print(f"🎆 Image selected at {open_file_path}")
if not open_file_path:
speak(
build_feedback_prompt("No image found in clipboard. Skipping this request.")
)
return
speak(
build_feedback_prompt(
"Okay I see the image, Now I'll generate the Vue component based on the image and your request."
)
)
component_response: VueComponentResponse = llm.gpt4t_w_vision_image_with_model(
dedent(f"""You're a Senior Vue 3 developer. You build new Vue components using the Composition API with <script setup lang='ts'>.
You strictly follow the REQUIREMENTS below.
REQUIREMENTS:
- Your current assignment is to build a new vue component that matches the image.
- Return strictly the code for the Vue component including <template>, <script setup lang='ts'>, and <style> sections.
- Use tailwind css to style the component.
- Respond in this JSON format exclusively: {{vue_component: ''}}
"""),
file_path=open_file_path,
pydantic_model=VueComponentResponse,
)
file_name_response: FileNameResponse = llm.gpt4t_w_vision_json_prompt(
dedent(f"""You're a Senior Vue 3 developer. You build new Vue components using the Composition API with <script setup lang='ts'>.
You've just created the VUE_COMPONENT. Now you're naming the component.
Create a concise and descriptive name for the component.
Respond in this JSON format exclusively: {{file_name: ''}}
VUE_COMPONENT:
{component_response.vue_component}
"""),
pydantic_model=FileNameResponse,
)
# dump to .vue file
file_path = os.path.join(
configuration["working_directory"], file_name_response.file_name
)
# write
with open(file_path, "w") as file:
file.write(component_response.vue_component)
speak(
build_feedback_prompt(
f"I've created the Vue component and named it {file_name_response.file_name}. Let me know if you want to make any edits."
)
)
human_in_the_loop.open_file_in_editor_and_continue(file_path)
requested_updates = human_in_the_loop.open_editor()
if not requested_updates:
speak(build_feedback_prompt("No changes requested. Component ready for use."))
return
component_to_update = component_response.vue_component
update_component_response: VueComponentResponse = llm.gpt4t_w_vision_json_prompt(
dedent(f"""You're a Senior Vue 3 developer. You build new Vue components using the Composition API with <script setup lang='ts'>.
You've just created the VUE_COMPONENT. A change from your product manager has come in and you're now tasked with updating the component.
You follow the REQUIREMENTS below to make sure the component is updated correctly.
REQUIREMENTS:
- Your current assignment is to make updates to the VUE_COMPONENT based on the changes requested by the product manager.
- Return strictly the code for the Vue component including <template>, <script setup lang='ts'>, and <style> sections.
- Use tailwind css to style the component.
- Respond in this JSON format exclusively: {{vue_component: ''}}
REQUESTED_CHANGES:
{requested_updates}
VUE_COMPONENT:
{component_to_update}
"""),
pydantic_model=VueComponentResponse,
)
# write to file
with open(file_path, "w") as file:
file.write(update_component_response.vue_component)
speak(
build_feedback_prompt(
f"I've updated the Vue component based on your feedback. What's next?"
)
)
pass
def run_bash_command_workflow(prompt: str):
run_bash_prompt = dedent(
f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.
You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.
You've been asked to run the following bash COMMAND: '{prompt}'
Here are available bash COMMANDS you can run:
# chrome browser
browser() {{
open -a 'Google Chrome' $1
}}
# typescript playground
playt() {{
cursor "/Users/ravix/Documents/projects/experimental/playt"
}}
chats() {{
browser "https://aistudio.google.com/app/prompts/new_chat"
browser "https://console.anthropic.com/workbench"
browser "https://chat.openai.com/"
}}
Based on the COMMAND - RESPOND WITH THE COMMAND to run in this JSON format: {{bash_command_to_run: ''}}.
Exclude any new lines or code blocks from the command. Respond with exclusively JSON.
Your COMMAND will be immediately run and the output will be returned to the user.
""")
class BashCommandResponse(BaseModel):
bash_command_to_run: str
response: BashCommandResponse = llm.gpt4t_w_vision_json_prompt(
run_bash_prompt, pydantic_model=BashCommandResponse
)
print(f"👧 Raw response: ", response)
command = response.bash_command_to_run
print(f"💻 {PERSONAL_AI_ASSISTANT_NAME} is running this command: ", command)
try:
command = "source ~/.bash_profile && " + command
result = subprocess.run(
command,
shell=True,
)
print(f"💻 Command executed successfully: {command}")
print(f"💻 Output: {result.stdout}")
except subprocess.CalledProcessError as e:
print(f"💻 Error executing command: {command}\n💻 Error: {e}")
return
soft_talk_prompt = dedent(
f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.
You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.
We both like short, concise, back-and-forth conversations.
We don't like small talk so we always steer our conversation back toward creating, building, product development, designing, and coding.
You've just helped your human companion run this bash COMMAND: {command}
Let your human companion know you've finished running the command and what you can do next.
""")
response = llm.gpro_1_5_prompt(soft_talk_prompt)
speak(response)
pass
def question_answer_workflow(prompt: str):
question_answer_prompt = dedent(
f"""{PERSONAL_AI_ASSISTANT_PROMPT_HEAD}
We don't like small talk so we always steer our conversation back toward creating, building, product development, designing, and coding.
We like to discuss in high level details without getting too technical.
Respond to the following question: {prompt}
""")
response = llm.gpro_1_5_prompt(question_answer_prompt)
speak(response)
pass
def soft_talk_workflow(prompt: str):
soft_talk_prompt = dedent(
f"""{PERSONAL_AI_ASSISTANT_PROMPT_HEAD}
We don't like small talk so we always steer our conversation back toward creating, building, product development, designing, and coding.
Respond to the following prompt: {prompt}
""")
response = llm.gpro_1_5_prompt(soft_talk_prompt)
speak(response)
return
def shell_command_workflow(prompt: str):
shell_command_prompt = dedent(
f"""You are a highly efficient, code-savvy AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.
You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.
Your task is to provide a JSON response with the following format: {{command_to_run: ''}} detailing the shell command
for the macOS bash shell to based on this question: {prompt}.
After generating the response, your command will be attached DIRECTLY to your human companions clipboard to be run.
""")
class ShellCommandModel(BaseModel):
command_to_run: str
response = llm.gpt4t_w_vision_json_prompt(
prompt=shell_command_prompt,
pydantic_model=ShellCommandModel, # Assuming there's a suitable model or this parameter is handled appropriately within the function.
)
pyperclip.copy(response.command_to_run)
completion_prompt = dedent(
f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.
You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.
We both like short, concise, back-and-forth conversations.
You've just attached the command '{response.command_to_run}' to your human companion's clipboard like they've requested.
Let your human companion know you've attached it and let them know you're ready for the next task.
""")
completion_response = llm.gpro_1_5_prompt(completion_prompt)
speak(completion_response)
def summarize_diff_workflow(start: str | dict, end: str | dict, file: str):
"""
Summarize the diff between two strings
"""
start = json.dumps(start, indent=2).splitlines()
end = json.dumps(end, indent=2).splitlines()
diff = difflib.unified_diff(start, end, fromfile="before", tofile="after")
diffed = "\n".join(diff)
summarize_prompt = dedent(f"""{PERSONAL_AI_ASSISTANT_PROMPT_HEAD}
Your companion has just finished editing the {file}.
You'll concisely summarize the changes made to the file in a 1 sentence summary.
The point is to communicate and acknowledge the changes made to the file.
The changes are:
{diffed}
""")
summarize_response = llm.gpro_1_5_prompt(summarize_prompt)
speak(summarize_response)
return diffed
def configure_assistant_workflow(prompt: str):
"""
Configure settings for our assistant
"""
configure_prompt = dedent(f"""{PERSONAL_AI_ASSISTANT_PROMPT_HEAD}
You've just opened a configuration file for your human companion.
Let your human companion know you've opened the file and are ready for them to edit it.
""")
prompt_response = llm.gpro_1_5_prompt(prompt=configure_prompt)
speak(prompt_response)
global configuration
previous_configuration = configuration
updated_config = human_file_json_prompt(configuration)
with open(CONFIG_FILE, "w") as config_file:
json.dump(updated_config, config_file, indent=2)
summarize_diff_workflow(
previous_configuration, updated_config, "configuration.json"
)
def end_conversation_workflow(prompt: str):
end_prompt = dedent(f"""{PERSONAL_AI_ASSISTANT_PROMPT_HEAD}
We're wrapping up our work for the day. You're a great engineering partner.
Thanks for all your help and for being a great engineering partner.
Respond to your human companions closing thoughts: {prompt}
""")
response = llm.gpro_1_5_prompt(end_prompt)
speak(response)
sys.exit()
def example_code_workflow(prompt: str):
"""
Generate code for a given prompt
"""
class ExampleCodeResponse(BaseModel):
code: str
class ExampleCodeFileNameResponse(BaseModel):
file_name: str
url_from_clipboard = pyperclip.paste()
if not url_from_clipboard or "http" not in url_from_clipboard:
speak(
build_feedback_prompt(
"I don't see a URL on your clipboard. Please paste a URL into your editor."
)
)
url_from_clipboard = human_in_the_loop.open_editor()
if not url_from_clipboard:
speak(
build_feedback_prompt(
"Still no URL found in clipboard. Skipping this request."
)
)
return
print(f"🔗 Scraping URL found in clipboard: {url_from_clipboard}")
speak(
build_feedback_prompt(
dedent(f"""I've found the URL in your clipboard.
I'll scrape the URL and example generate code for you.
But first, what about the example code would you like me to focus on?
""")
)
)
feedback_for_code_generation = human_in_the_loop.open_editor()
speak(
build_feedback_prompt(
f"Okay got it, I see you want to focus on '{feedback_for_code_generation}'. I'll generate the code for you now."
)
)
scraped_markdown = scrape_to_markdown(url_from_clipboard)
example_code_response_1: ExampleCodeResponse = llm.gpro_1_5_prompt_with_model(
dedent(f"""You're a professional software developer advocate that takes pride in writing good code.
You take documentation, and convert it into runnable code.
You have a new request to generate code for the following url: '{url_from_clipboard}' with a focus on '{feedback_for_code_generation}'.
Given the scraped WEBSITE_CONTENT content below, generate working code to showcase how to run the code.
Focus on the code. Use detailed variable and function names. Comment every line of code explaining what it does.
Remember, this is code to showcase how the code works. It should be fully functional and runnable.
Respond in this JSON format exclusively: {{code: ''}}
WEBSITE_CONTENT:
{scraped_markdown}
"""),
pydantic_model=ExampleCodeResponse,
)
print(f"👧 Raw response: v1\n\n", example_code_response_1.code)
example_code_response_2 = llm.gpt4t_w_vision_json_prompt(
dedent(f"""You are an elite level, principle software engineer.
You work with a co-engineer that likes to leave non-runnable code in the code so you're responsible for making sure it's runnable.
You've just generated the first draft EXAMPLE_CODE below.
You're now taking a second pass to clean it up to make sure it meets the REQUIREMENTS
REQUIREMENTS:
- Make sure it's immediately runnable and functional.
- Removing anything that isn't runnable code.
- This code will be immediately placed into a file and run.
- The code should be well commented so it's easy to understand.
- The code should be well formatted so it's easy to read.
- The code should use verbose variable and function names.
- You pay close attention to indentation.
- Respond in JSON format with the following keys: {{code: ''}}
EXAMPLE_CODE:
{example_code_response_1.code}
"""),
pydantic_model=ExampleCodeResponse,
)
print(f"👧 Raw response: v2\n\n", example_code_response_2.code)
example_code_response_3 = llm.gpt4t_w_vision_json_prompt(
dedent(f"""You are a top-level programmer and super-expert in software engineering.
You've received a near final draft of code to finalize.
You work with a co-engineer that likes to leave non-runnable code in the code so you're responsible for making sure it's runnable.
You're taking a final pass to make sure the code is near perfect and fully runnable.
You follow the REQUIREMENTS below to make sure the code is top notch for production deployment.
REQUIREMENTS:
- Make sure the code is immediately runnable and functional.
- Removing anything that isn't runnable code.
- This code will be immediately placed into a file and run.
- The code follows expert coding best practices.
- The code should be well commented so it's easy to understand.
- The code should be well formatted so it's easy to read.
- The code should use verbose variable and function names.
- You pay close attention to indentation.
- Respond in JSON format with the following keys: {{code: ''}}
EXAMPLE_CODE:
{example_code_response_2.code}
"""),
pydantic_model=ExampleCodeResponse,
)
print(f"👧 Raw response: v3\n\n", example_code_response_3.code)
example_code_file_prompt = dedent(f"""{PERSONAL_AI_ASSISTANT_PROMPT_HEAD}
You've just generated the following CODE below for your human companion.
Create a file name for the code file that will be written to the following directory: {configuration['working_directory']}
The file name should be unique and descriptive of the code it contains.
Respond exclusively with the file name in the following JSON format: {{file_name: ''}}.
CODE:
{example_code_response_3.code}
""")
example_code_file_name_response = llm.gpt4t_w_vision_json_prompt(
example_code_file_prompt,
pydantic_model=ExampleCodeFileNameResponse,
)
new_file_name = example_code_file_name_response.file_name
new_file_path = os.path.join((configuration["working_directory"]), new_file_name)
# write the code to the file
with open(new_file_path, "w") as file:
file.write(example_code_response_3.code)
print(f"✅ Code example written to {new_file_path}")
speak(
build_feedback_prompt(
f"Code has been written to the working directory into a file named {new_file_name}. Let me know if you need anything else."
)
)
pass
# --------------------- Helper Methods ---------------------
def human_file_json_prompt(contents: dict):
"""
Prompt the user to edit the file
"""
edited_contents = editor.edit(contents=json.dumps(contents, indent=2))
edited_config = json.loads(edited_contents.decode())
return edited_config
def scrape_to_markdown(url):
# Send a GET request to the URL
response = requests.get(url)
# Create a BeautifulSoup object to parse the HTML content
soup = BeautifulSoup(response.content, "html.parser")
# Convert the parsed HTML to Markdown using markdownify
markdown = markdownify(str(soup), strip=["script", "style"])
return markdown
def build_feedback_prompt(message: str):
"""
Build a prompt using the existing prompt format and ask our assistant to respond given the 'message'
"""
prompt = dedent(f"""{PERSONAL_AI_ASSISTANT_PROMPT_HEAD}
Concisely communicate the following message to your human companion: '{message}'
""")
response = llm.gpro_1_5_prompt(prompt)
return response
# --------------------- AUDIO I/O ---------------------
def speak(text: str):
client = ElevenLabs(
api_key=os.getenv("ELEVEN_API_KEY"), # Defaults to ELEVEN_API_KEY from .env
)
# text=text, voice="WejK3H1m7MI9CHnIjW9K",
audio = client.generate(
text=text,
voice="x7h79ussrwcHQUQgfa0n",
model="eleven_turbo_v2",
# model="eleven_multilingual_v2",
)
play(audio)
def transcribe_audio_file(file_path):
try:
# STEP 1 Create a Deepgram client using the API key
api_key = os.getenv("DEEPGRAM_API_KEY")
dg_client = DeepgramClient(api_key)
# STEP 2 Read the recorded audio file
with open(file_path, "rb") as file:
buffer_data = file.read()
#STEP 2: Configure Deepgram options for audio analysis
payload: FileSource = { "buffer": buffer_data }
options = PrerecordedOptions(model="nova-2", smart_format=True)
# STEP 3: Call the transcribe_file method with the text payload and options
response = dg_client.listen.prerecorded.v("1").transcribe_file(payload, options)
# STEP 4: Await the response and extract the transcript
transcript = response['results']['channels'][0]['alternatives'][0]['transcript']
return transcript
except Exception as e:
print(f"Exception: {e}")
return ""
def track_interaction_time():
"""Track the time it takes for the user to interact with the system in seconds."""
global iteration_start_time
if iteration_start_time:
interaction_time = (datetime.now() - iteration_start_time).total_seconds()
print(f"🕒 Interaction time: {interaction_time} seconds")
iteration_start_time = None
def record_audio(duration=10, fs=44100):
"""Record audio from the microphone."""
track_interaction_time()
print("🔴 Recording...")
recording = sd.rec(
int(duration * fs), samplerate=fs, channels=channels, dtype="int16"
)
sd.wait()
print(f"🎧 Recording Chunk Complete")
global iteration_start_time
iteration_start_time = datetime.now()
return recording
def save_audio_file(recording, fs=44100, filename="output.wav"):
"""Save the recorded audio to a file."""
with wave.open(filename, "wb") as wf:
wf.setnchannels(channels)
wf.setsampwidth(2)
wf.setframerate(fs)
wf.writeframes(recording)
def personal_ai_assistant_loop(
audio_chunk_size=10, activation_keyword=ACTIVATION_KEYWORD, on_keywords=None
):
while True:
recording = record_audio(duration=audio_chunk_size)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"audio_{timestamp}.wav"
save_audio_file(recording, filename=filename)
file_size = os.path.getsize(filename)
print(f"📁 File {filename} has been saved with a size of {file_size} bytes.")
transcript = transcribe_audio_file(filename)
print("📝 transcript was:", transcript)
if activation_keyword.lower() in transcript.lower():
if on_keywords:
on_keywords(transcript)
os.remove(filename)
def text_after_keyword(transcript: str, keyword:str):
"""Extract and return the text that comes after a specified keyword in the transcript."""
try:
# Find the position of the keyword in the transcript
keyword_position = transcript.lower().find(keyword.lower())
if keyword_position == -1:
# If the keyword is not found, return an empty string
return ""
# Extract the text after the keyword
text_after = transcript[keyword_position + len(keyword) :].strip()
return text_after
except Exception as e:
print(f"Error extracting text after keyword: {e}")
return ""
def get_first_keyword_in_prompt(prompt: str):
map_keywords_to_agents = get_simple_keyword_ai_agent_router()
for keyword_group, agent in map_keywords_to_agents.items():
keywords = keyword_group.split(",")
for keyword in keywords:
if keyword in prompt.lower():
return agent, keyword
return None, None
def on_activation_keyword_detected(transcript: str):
print("✅ Activation keyword detected!, transcript is: ", transcript)
prompt = text_after_keyword(transcript, ACTIVATION_KEYWORD)
print("🔍 prompt is: ", prompt)
agent_to_run, agent_keyword = get_first_keyword_in_prompt(prompt)
if not agent_to_run:
print("❌ No agent found for the given prompt.")
return
print(f"✅ Found agent via keyword '{agent_keyword}'")
agent_to_run(prompt)
personal_ai_assistant_loop(on_keywords=on_activation_keyword_detected)
import json
import re
def parse_json_from_gemini(json_str: str):
"""Parses a dictionary from a JSON-like object string.
Args:
json_str: A string representing a JSON-like object, e.g.:
```json
{
"key1": "value1",
"key2": "value2"
}
```
Returns:
A dictionary representing the parsed object, or None if parsing fails.
"""
try:
# Remove potential leading/trailing whitespace
json_str = json_str.strip()
# Extract JSON content from triple backticks and "json" language specifier
json_match = re.search(r"```json\s*(.*?)\s*```", json_str, re.DOTALL)
if json_match:
json_str = json_match.group(1)
return json.loads(json_str)
except (json.JSONDecodeError, AttributeError):
return None
import sounddevice as sd
import numpy as np
import vosk
import queue
class VoiceRecorder:
def __init__(self, model_path='model', device=None, activation_keyword='Hello Ada', end_keyword='thanks', stop_keyword='stop recording'):
self.model = vosk.Model(model_path)
self.device = device
self.activation_keyword = activation_keyword.lower()
self.end_keyword = end_keyword.lower()
self.stop_keyword = stop_keyword.lower()
self.interaction_transcript = ""
self.recording = False
self.q = queue.Queue()
def callback(self, indata, frames, time, status):
self.q.put(bytes(indata))
def continuous_listen(self):
with sd.RawInputStream(callback=self.callback, device=self.device, dtype='int16',
channels=1, samplerate=16000) as stream:
rec = vosk.KaldiRecognizer(self.model, stream.samplerate)
while True:
data = self.q.get()
if rec.AcceptWaveform(data):
result = rec.Result()
continue_listening = self.process_result(eval(result)['text'])
if not continue_listening:
print("Shutting down the listening process.")
break
def process_result(self, transcript):
print(f"Detected: {transcript}")
if self.activation_keyword in transcript and not self.recording:
self.start_interaction()
elif self.end_keyword in transcript and self.recording:
self.stop_interaction()
elif self.stop_keyword in transcript:
return False
if self.recording:
self.interaction_transcript += " " + transcript
return True
def start_interaction(self):
print("Starting interaction ...")
self.recording = True
def stop_interaction(self):
print("Stopping interaction ...")
self.process_command(self.interaction_transcript)
self.interaction_transcript = ""
self.recording = False
def process_command(self, transcript):
# Process the recorded audio or perform actions based on the last command
print(f"Processing command: {transcript}")
# Example usage:
if __name__ == "__main__":
# Ensure you have a Vosk model directory.
recorder = VoiceRecorder('./audio_models/vosk-model-en-us-0.22-lgraph')
recorder.continuous_listen()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment