disler/README.md

## README.md

      
    Raw
  

              README.md
            
          
    This is not working complete code.
This is strictly a v0, scrapy, proof of concept for the first version of a personal AI Assistant working end to end in just ~322 LOC.
It's only a frame of reference for you to consume the core ideas of how to build a POC of a personal AI Assistant.
To see the high level of how this works check out the explanation video.
To follow our agentic journey check out the @IndyDevDan channel.
Stay focused, keep building.

  
## main7_ada_personal_ai_assistant_v0.py
import sys
import assemblyai as aai
from pydantic import BaseModel
import sounddevice as sd
import wave
from datetime import datetime
import os
from dotenv import load_dotenv
from elevenlabs import play
from elevenlabs.client import ElevenLabs
import subprocess
import pyperclip
from modules import llm

load_dotenv()

ACTIVATION_KEYWORD = "ada"
PERSONAL_AI_ASSISTANT_NAME = "Ada"
HUMAN_COMPANION_NAME = "Dan"

channels = 1

iteration_start_time = None


def speak(text: str):
    client = ElevenLabs(
        api_key=os.getenv("ELEVEN_API_KEY"),  # Defaults to ELEVEN_API_KEY from .env
    )

    # text=text, voice="WejK3H1m7MI9CHnIjW9K",
    audio = client.generate(
        text=text,
        voice="WejK3H1m7MI9CHnIjW9K",
        model="eleven_turbo_v2",
        # model="eleven_multilingual_v2",
    )

    play(audio)


def run_bash_command(prompt: str):

    run_bash_prompt = f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.

You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.

You've been asked to run the following bash command: '{prompt}'

Here are available bash commands you can run

# chrome browser
browser() {{
  open -a 'Google Chrome' $1
}}

# typescript playground
playt() {{
  cursor "/Users/ravix/Documents/projects/experimental/playt"
}}

Based on the command - RESPOND WITH THE COMMAND to run in this JSON format: {{bash_command_to_run: ''}}.

Exclude any new lines or code blocks from the command. Respond with exclusively JSON.

Your command will be immediately run and the output will be returned to the user.

"""

    class BashCommandResponse(BaseModel):
        bash_command_to_run: str

    response: BashCommandResponse = llm.gpt4t_w_vision_prompt(
        run_bash_prompt, pydantic_model=BashCommandResponse
    )

    print(f"👧 Raw response: ", response)

    command = response.bash_command_to_run

    print(f"💻 {PERSONAL_AI_ASSISTANT_NAME} is running this command: ", command)
    try:
        command = "source ~/.bash_profile && " + command
        result = subprocess.run(
            command,
            shell=True,
        )
        print(f"💻 Command executed successfully: {command}")
        print(f"💻 Output: {result.stdout}")
    except subprocess.CalledProcessError as e:
        print(f"💻 Error executing command: {command}\n💻 Error: {e}")
        return

    soft_talk_prompt = f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.

You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.

We both like short, concise, back-and-forth conversations.

We don't like small talk so we always steer our conversation back toward creating, building, product development, designing, and coding.

You've just helped your human companion run this bash command: {command}

Let your human companion know you've finished running the command and what you can do next."""

    response = llm.gpro_1_5_prompt(soft_talk_prompt)

    speak(response)

    pass


def question_answer(prompt: str):

    question_answer_prompt = f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.

You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.

We both like short, concise, back-and-forth conversations, no longer than 2 sentences.

We don't like small talk so we always steer our conversation back toward creating, building, product development, designing, and coding.

We like to discuss in high level details without getting too technical.

Respond to the following question: {prompt}"""

    response = llm.gpro_1_5_prompt(question_answer_prompt)

    speak(response)

    pass


def soft_talk(prompt: str):

    soft_talk_prompt = f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.

You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.

We both like short, concise, back-and-forth conversations.

We don't like small talk so we always steer our conversation back toward creating, building, product development, designing, and coding.

Respond to the following prompt: {prompt}"""

    response = llm.gpro_1_5_prompt(soft_talk_prompt)

    speak(response)

    return


def shell_command(prompt: str):
    shell_command_prompt = f"""You are a highly efficient, code-savvy AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.

You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.

Your task is to provide a JSON response with the following format: {{command_to_run: ''}} detailing the shell command for MacOS bash to based on this question: {prompt}.

After generating the response, your command will be attached DIRECTLY to your human companions clipboard to be run."""

    class ShellCommandModel(BaseModel):
        command_to_run: str

    response = llm.gpt4t_w_vision_prompt(
        prompt=shell_command_prompt,
        pydantic_model=ShellCommandModel,  # Assuming there's a suitable model or this parameter is handled appropriately within the function.
    )

    pyperclip.copy(response.command_to_run)

    completion_prompt = f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.

You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.

We both like short, concise, back-and-forth conversations.

You've just attached the command '{response.command_to_run}' to your human companion's clipboard like they've requested.

Let your human companion know you've attached it and let them know you're ready for the  next task."""

    completion_response = llm.gpro_1_5_prompt(completion_prompt)

    speak(completion_response)


def end_conversation(prompt: str):

    end_prompt = f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.

You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.

We both like short, concise, back-and-forth conversations.

We're wrapping up our work for the day. You're a great engineering partner.

Thanks for all your help and for being a great engineering partner.

Respond to your human companions closing thoughts: {prompt}"""

    response = llm.gpro_1_5_prompt(end_prompt)

    speak(response)
    sys.exit()


def get_keywords_agent_router():
    """
    Decision making based on contents of prompt (Simple LLM Router).
    """
    return {
        "bash,browser": run_bash_command,  # AI Agent | Agentic Workflow
        "shell": shell_command,  # AI Agent | Agentic Workflow
        "question": question_answer,  # AI Agent | Agentic Workflow
        "hello,hey,hi": soft_talk,  # AI Agent | Agentic Workflow
        "exit": end_conversation,
    }


def transcribe_audio_file(file_path):
    aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
    transcriber = aai.Transcriber()
    transcript = transcriber.transcribe(file_path)
    return transcript


def track_interaction_time():
    """Track the time it takes for the user to interact with the system in seconds."""
    global iteration_start_time
    if iteration_start_time:
        interaction_time = (datetime.now() - iteration_start_time).total_seconds()
        print(f"🕒 Interaction time: {interaction_time} seconds")
        iteration_start_time = None


def record_audio(duration=10, fs=44100):
    """Record audio from the microphone."""
    track_interaction_time()

    print("🔴 Recording...")
    recording = sd.rec(
        int(duration * fs), samplerate=fs, channels=channels, dtype="int16"
    )
    sd.wait()
    print(f"🎧 Recording Chunk Complete")
    global iteration_start_time
    iteration_start_time = datetime.now()
    return recording


def save_audio_file(recording, fs=44100, filename="output.wav"):
    """Save the recorded audio to a file."""
    with wave.open(filename, "wb") as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(2)
        wf.setframerate(fs)
        wf.writeframes(recording)


def personal_ai_assistant_loop(
    audio_chunk_size=10, activation_keyword=ACTIVATION_KEYWORD, on_keywords=None
):
    while True:
        recording = record_audio(duration=audio_chunk_size)
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"audio_{timestamp}.wav"
        save_audio_file(recording, filename=filename)
        file_size = os.path.getsize(filename)
        print(f"📁 File {filename} has been saved with a size of {file_size} bytes.")
        transcript = transcribe_audio_file(filename)
        print("📝 transcript was:", transcript.text)
        if activation_keyword.lower() in transcript.text.lower():
            if on_keywords:
                on_keywords(transcript)
        os.remove(filename)


def text_after_keyword(transcript, keyword):
    """Extract and return the text that comes after a specified keyword in the transcript."""
    try:
        # Find the position of the keyword in the transcript
        keyword_position = transcript.text.lower().find(keyword.lower())
        if keyword_position == -1:
            # If the keyword is not found, return an empty string
            return ""
        # Extract the text after the keyword
        text_after = transcript.text[keyword_position + len(keyword) :].strip()
        return text_after
    except Exception as e:
        print(f"Error extracting text after keyword: {e}")
        return ""


def get_first_keyword_in_prompt(prompt: str):
    map_keywords_to_agents = get_keywords_agent_router()
    for keyword_group, agent in map_keywords_to_agents.items():
        keywords = keyword_group.split(",")
        for keyword in keywords:
            if keyword in prompt.lower():
                return agent, keyword
    return None, None


def on_activation_keyword_detected(transcript: aai.Transcript):
    print("✅ Activation keyword detected!, transcript is: ", transcript.text)

    prompt = text_after_keyword(transcript, ACTIVATION_KEYWORD)

    print("🔍 prompt is: ", prompt)

    agent_to_run, agent_keyword = get_first_keyword_in_prompt(prompt)

    if not agent_to_run:
        print("❌ No agent found for the given prompt.")
        return

    print(f"✅ Found agent via keyword '{agent_keyword}'")

    agent_to_run(prompt)


personal_ai_assistant_loop(on_keywords=on_activation_keyword_detected)
	import sys
	import assemblyai as aai
	from pydantic import BaseModel
	import sounddevice as sd
	import wave
	from datetime import datetime
	import os
	from dotenv import load_dotenv
	from elevenlabs import play
	from elevenlabs.client import ElevenLabs
	import subprocess
	import pyperclip
	from modules import llm

	load_dotenv()

	ACTIVATION_KEYWORD = "ada"
	PERSONAL_AI_ASSISTANT_NAME = "Ada"
	HUMAN_COMPANION_NAME = "Dan"

	channels = 1

	iteration_start_time = None


	def speak(text: str):
	client = ElevenLabs(
	api_key=os.getenv("ELEVEN_API_KEY"), # Defaults to ELEVEN_API_KEY from .env
	)

	# text=text, voice="WejK3H1m7MI9CHnIjW9K",
	audio = client.generate(
	text=text,
	voice="WejK3H1m7MI9CHnIjW9K",
	model="eleven_turbo_v2",
	# model="eleven_multilingual_v2",
	)

	play(audio)


	def run_bash_command(prompt: str):

	run_bash_prompt = f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.

	You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.

	You've been asked to run the following bash command: '{prompt}'

	Here are available bash commands you can run

	# chrome browser
	browser() {{
	open -a 'Google Chrome' $1
	}}

	# typescript playground
	playt() {{
	cursor "/Users/ravix/Documents/projects/experimental/playt"
	}}

	Based on the command - RESPOND WITH THE COMMAND to run in this JSON format: {{bash_command_to_run: ''}}.

	Exclude any new lines or code blocks from the command. Respond with exclusively JSON.

	Your command will be immediately run and the output will be returned to the user.

	"""

	class BashCommandResponse(BaseModel):
	bash_command_to_run: str

	response: BashCommandResponse = llm.gpt4t_w_vision_prompt(
	run_bash_prompt, pydantic_model=BashCommandResponse
	)

	print(f"👧 Raw response: ", response)

	command = response.bash_command_to_run

	print(f"💻 {PERSONAL_AI_ASSISTANT_NAME} is running this command: ", command)
	try:
	command = "source ~/.bash_profile && " + command
	result = subprocess.run(
	command,
	shell=True,
	)
	print(f"💻 Command executed successfully: {command}")
	print(f"💻 Output: {result.stdout}")
	except subprocess.CalledProcessError as e:
	print(f"💻 Error executing command: {command}\n💻 Error: {e}")
	return

	soft_talk_prompt = f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.

	You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.

	We both like short, concise, back-and-forth conversations.

	We don't like small talk so we always steer our conversation back toward creating, building, product development, designing, and coding.

	You've just helped your human companion run this bash command: {command}

	Let your human companion know you've finished running the command and what you can do next."""

	response = llm.gpro_1_5_prompt(soft_talk_prompt)

	speak(response)

	pass


	def question_answer(prompt: str):

	question_answer_prompt = f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.

	You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.

	We both like short, concise, back-and-forth conversations, no longer than 2 sentences.

	We don't like small talk so we always steer our conversation back toward creating, building, product development, designing, and coding.

	We like to discuss in high level details without getting too technical.

	Respond to the following question: {prompt}"""

	response = llm.gpro_1_5_prompt(question_answer_prompt)

	speak(response)

	pass


	def soft_talk(prompt: str):

	soft_talk_prompt = f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.

	You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.

	We both like short, concise, back-and-forth conversations.

	We don't like small talk so we always steer our conversation back toward creating, building, product development, designing, and coding.

	Respond to the following prompt: {prompt}"""

	response = llm.gpro_1_5_prompt(soft_talk_prompt)

	speak(response)

	return


	def shell_command(prompt: str):
	shell_command_prompt = f"""You are a highly efficient, code-savvy AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.

	You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.

	Your task is to provide a JSON response with the following format: {{command_to_run: ''}} detailing the shell command for MacOS bash to based on this question: {prompt}.

	After generating the response, your command will be attached DIRECTLY to your human companions clipboard to be run."""

	class ShellCommandModel(BaseModel):
	command_to_run: str

	response = llm.gpt4t_w_vision_prompt(
	prompt=shell_command_prompt,
	pydantic_model=ShellCommandModel, # Assuming there's a suitable model or this parameter is handled appropriately within the function.
	)

	pyperclip.copy(response.command_to_run)

	completion_prompt = f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.

	You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.

	We both like short, concise, back-and-forth conversations.

	You've just attached the command '{response.command_to_run}' to your human companion's clipboard like they've requested.

	Let your human companion know you've attached it and let them know you're ready for the next task."""

	completion_response = llm.gpro_1_5_prompt(completion_prompt)

	speak(completion_response)


	def end_conversation(prompt: str):

	end_prompt = f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.

	You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.

	We both like short, concise, back-and-forth conversations.

	We're wrapping up our work for the day. You're a great engineering partner.

	Thanks for all your help and for being a great engineering partner.

	Respond to your human companions closing thoughts: {prompt}"""

	response = llm.gpro_1_5_prompt(end_prompt)

	speak(response)
	sys.exit()


	def get_keywords_agent_router():
	"""
	Decision making based on contents of prompt (Simple LLM Router).
	"""
	return {
	"bash,browser": run_bash_command, # AI Agent \| Agentic Workflow
	"shell": shell_command, # AI Agent \| Agentic Workflow
	"question": question_answer, # AI Agent \| Agentic Workflow
	"hello,hey,hi": soft_talk, # AI Agent \| Agentic Workflow
	"exit": end_conversation,
	}


	def transcribe_audio_file(file_path):
	aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
	transcriber = aai.Transcriber()
	transcript = transcriber.transcribe(file_path)
	return transcript


	def track_interaction_time():
	"""Track the time it takes for the user to interact with the system in seconds."""
	global iteration_start_time
	if iteration_start_time:
	interaction_time = (datetime.now() - iteration_start_time).total_seconds()
	print(f"🕒 Interaction time: {interaction_time} seconds")
	iteration_start_time = None


	def record_audio(duration=10, fs=44100):
	"""Record audio from the microphone."""
	track_interaction_time()

	print("🔴 Recording...")
	recording = sd.rec(
	int(duration * fs), samplerate=fs, channels=channels, dtype="int16"
	)
	sd.wait()
	print(f"🎧 Recording Chunk Complete")
	global iteration_start_time
	iteration_start_time = datetime.now()
	return recording


	def save_audio_file(recording, fs=44100, filename="output.wav"):
	"""Save the recorded audio to a file."""
	with wave.open(filename, "wb") as wf:
	wf.setnchannels(channels)
	wf.setsampwidth(2)
	wf.setframerate(fs)
	wf.writeframes(recording)


	def personal_ai_assistant_loop(
	audio_chunk_size=10, activation_keyword=ACTIVATION_KEYWORD, on_keywords=None
	):
	while True:
	recording = record_audio(duration=audio_chunk_size)
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	filename = f"audio_{timestamp}.wav"
	save_audio_file(recording, filename=filename)
	file_size = os.path.getsize(filename)
	print(f"📁 File {filename} has been saved with a size of {file_size} bytes.")
	transcript = transcribe_audio_file(filename)
	print("📝 transcript was:", transcript.text)
	if activation_keyword.lower() in transcript.text.lower():
	if on_keywords:
	on_keywords(transcript)
	os.remove(filename)


	def text_after_keyword(transcript, keyword):
	"""Extract and return the text that comes after a specified keyword in the transcript."""
	try:
	# Find the position of the keyword in the transcript
	keyword_position = transcript.text.lower().find(keyword.lower())
	if keyword_position == -1:
	# If the keyword is not found, return an empty string
	return ""
	# Extract the text after the keyword
	text_after = transcript.text[keyword_position + len(keyword) :].strip()
	return text_after
	except Exception as e:
	print(f"Error extracting text after keyword: {e}")
	return ""


	def get_first_keyword_in_prompt(prompt: str):
	map_keywords_to_agents = get_keywords_agent_router()
	for keyword_group, agent in map_keywords_to_agents.items():
	keywords = keyword_group.split(",")
	for keyword in keywords:
	if keyword in prompt.lower():
	return agent, keyword
	return None, None


	def on_activation_keyword_detected(transcript: aai.Transcript):
	print("✅ Activation keyword detected!, transcript is: ", transcript.text)

	prompt = text_after_keyword(transcript, ACTIVATION_KEYWORD)

	print("🔍 prompt is: ", prompt)

	agent_to_run, agent_keyword = get_first_keyword_in_prompt(prompt)

	if not agent_to_run:
	print("❌ No agent found for the given prompt.")
	return

	print(f"✅ Found agent via keyword '{agent_keyword}'")

	agent_to_run(prompt)


	personal_ai_assistant_loop(on_keywords=on_activation_keyword_detected)