witt3rd/ADA_v2_README.md

## ADA_v2_README.md

      
    Raw
  

              ADA_v2_README.md
            
          
    This is not working complete code.
This is strictly a v0.2, scrapy, proof of concept version of a personal AI Assistant working end to end in just ~726 LOC.
This is the second iteration showcasing the two-way prompt aka multi-step human in the loop. The initial, v0, assistant version is here.
It's only a frame of reference for you to consume the core ideas of how to build a POC of a personal AI Assistant.
To see the high level of how this works check out the explanation video. To follow our agentic journey check out the @IndyDevDan channel.
Stay focused, keep building.

  
## editor.py
import subprocess
import os
import random
import time

def edit(contents: str):
    """
    Opens TextEdit on macOS and waits until it is closed to proceed.
    """
    # Get the current working directory
    current_dir = os.getcwd()
    # Generate a random number to include in the filename
    random_number = random.randint(1000, 9999)
    temp_file_path = os.path.join(current_dir, f'tempfile_{random_number}.json')

    # Create and close the temporary file explicitly
    with open(temp_file_path, 'w+') as tmp:
        tmp.write(contents)
        tmp.flush()

    # Change the file permissions to make it readable and writable by everyone
    os.chmod(temp_file_path, 0o666)

    # Introduce a delay
    time.sleep(1)  # Wait for 1 second before opening the file in Editor

    # Open the default text editor and wait for it to close
    editor_process = subprocess.Popen(['open', '-W', '-n', '-a', 'TextEdit', temp_file_path])

    # Wait for the TextEdit process to close
    editor_process.wait()

    # Read the modified content from the file
    with open(temp_file_path, 'r') as file:
        modified_content = file.read()

    # Clean up by removing the temporary file
    os.remove(temp_file_path)

    return modified_content

# Example usage:
if __name__ == "__main__":
    sample_contents = "How are you doing this. Tell me more about it:"
    modified_config = edit(sample_contents)
    print(modified_config)

## human_in_the_loop.py
import subprocess
import tkinter as tk
from tkinter import filedialog
from modules import editor


def open_file() -> str:
    """Opens a file selection dialog and returns the selected file path."""
    root = tk.Tk()
    root.withdraw()  # Hide the main window
    file_path = filedialog.askopenfile()
    if not file_path:
        return None
    root.destroy()
    return file_path.name


def open_editor() -> str:
    return editor.edit(contents="")


def open_file_in_editor_and_continue(file: str) -> None:
    """Opens a file in the editor using the 'code' command and allows the user to continue editing."""
    if file:
        subprocess.run(["code", file])
    else:
        print("No file provided to open.")

## llm.py
import base64
import google.generativeai as genai
import os
from dotenv import load_dotenv
from pydantic import BaseModel
from modules import parsers
import openai

# Load environment variables from .env file
load_dotenv()
api_key = os.environ["GOOGLE_API_KEY"]

openai.api_key = os.environ.get("OPENAI_API_KEY")

# Initialize Google API Client
genai.configure(api_key=api_key)


def gpro_1_5_prompt(prompt) -> str:
    """
    Generates content based on the provided prompt using the Gemini 1.5 API model and returns the text part of the first candidate's content.

    Args:
    - prompt (str): The prompt to generate content for.

    Returns:
    - str: The text part of the first candidate's content from the generated response.
    """
    model_name = "models/gemini-1.5-pro-latest"
    gen_config = genai.GenerationConfig()
    model = genai.GenerativeModel(model_name=model_name)
    response = model.generate_content(prompt, request_options={})
    return response.candidates[0].content.parts[0].text


def gpro_1_5_prompt_with_model(prompt, pydantic_model: BaseModel) -> BaseModel:
    """
    Generates content based on the provided prompt using the Gemini 1.5 API model and returns the text part of the first candidate's content.

    Args:
    - prompt (str): The prompt to generate content for.

    Returns:
    - str: The text part of the first candidate's content from the generated response.
    """
    model_name = "models/gemini-1.5-pro-latest"
    gen_config = genai.GenerationConfig()
    model = genai.GenerativeModel(model_name=model_name)
    response = model.generate_content(prompt, request_options={})
    response_text = response.candidates[0].content.parts[0].text
    if "```json" in response_text:
        return pydantic_model.model_validate(
            parsers.parse_json_from_gemini(response_text)
        )
    else:
        return pydantic_model.model_validate_json(response_text)


def gpt4t_w_vision_json_prompt(
    prompt: str,
    model: str = "gpt-4-turbo-2024-04-09",
    instructions: str = "You are a helpful assistant that response in JSON format.",
    pydantic_model: BaseModel = None,
) -> str:
    response = openai.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "system",
                "content": instructions,  # Added instructions as a system message
            },
            {
                "role": "user",
                "content": prompt,
            },
        ],
        response_format={"type": "json_object"},
    )

    response_text = response.choices[0].message.content
    print(f"Text LLM response: {response_text}")

    as_model = pydantic_model.model_validate_json(response_text)

    return as_model


def gpt4t_w_vision(
    prompt: str,
    model: str = "gpt-4-turbo-2024-04-09",
    instructions: str = "You are a helpful assistant.",
) -> str:
    response = openai.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "system",
                "content": instructions,  # Added instructions as a system message
            },
            {
                "role": "user",
                "content": prompt,
            },
        ],
    )

    response_text = response.choices[0].message.content
    return response_text


def encode_image(image_path: str) -> str:
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")


def gpt4t_w_vision_image_with_model(
    prompt: str,
    file_path: str,
    model: str = "gpt-4-turbo-2024-04-09",
    instructions: str = "You are a helpful assistant that specializes in image analysis.",
    pydantic_model: BaseModel = None,
):

    file_extension = file_path.split(".")[-1]

    base64_image = encode_image(file_path)

    print("base64_image", base64_image)

    response = openai.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "system",
                "content": instructions,
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": prompt,
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/{file_extension};base64,{base64_image}"
                        },
                    },
                ],
            },
        ],
        response_format={"type": "json_object"},
    )

    print("response", response)

    response_text = response.choices[0].message.content

    print("response_text", response_text)

    parsed_response = pydantic_model.model_validate_json(response_text)

    return parsed_response

## main9_ada_personal_ai_assistant_v02.py
# ADA - Personal AI Assistant (v0.2.1)
# Proof of Concept

# CHANGES: (v0.2.1)
# - using textwrap.dedent() for better formatting of multi-line prompts
# - using deepgram instead of assembly ai for audio-to-text transcription
# - minor changes to the code generation prompts

import sys
from tkinter import Tk, filedialog
import tkinter
from pydantic import BaseModel
import sounddevice as sd
import wave
from datetime import datetime
import os
from dotenv import load_dotenv
from elevenlabs import play
from elevenlabs.client import ElevenLabs
import subprocess
import pyperclip
from modules import human_in_the_loop, llm
from modules import editor
import json
import difflib
import requests
from bs4 import BeautifulSoup
from markdownify import markdownify
from textwrap import dedent
from deepgram import (
    DeepgramClient,
    PrerecordedOptions,
)


load_dotenv()

ACTIVATION_KEYWORD = "Ada"
PERSONAL_AI_ASSISTANT_NAME = "ADA"
CONFIG_FILE = "./config.json"
HUMAN_COMPANION_NAME = "Chris"
PERSONAL_AI_ASSISTANT_PROMPT_HEAD = dedent(
       f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.
           You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.
           We both like short, concise, back-and-forth conversations.
        """)

try:
    with open(CONFIG_FILE, "r") as config_file:
        configuration = json.load(config_file)
except FileNotFoundError:
    configuration = {
        "working_directory": None,
    }
    # write
    with open(CONFIG_FILE, "w") as config_file:
        json.dump(configuration, config_file, indent=2)

channels = 1

iteration_start_time = None


# --------------------- Agent Workflows ---------------------


def get_simple_keyword_ai_agent_router():
    """
    Decision making based on contents of prompt (Simple LLM Router).
    """
    return {
        # v0.2 New Flows w/two-way prompts
        "configure,configuration": configure_assistant_workflow,
        "example code": example_code_workflow,
        "view component": image_to_vue_component_workflow,
        #
        "bash,browser": run_bash_command_workflow,  # AI Agent | Agentic Workflow
        "shell": shell_command_workflow,  # AI Agent | Agentic Workflow
        "question": question_answer_workflow,  # AI Agent | Agentic Workflow
        "hello,hey,hi": soft_talk_workflow,  # AI Agent | Agentic Workflow
        #
        "exit": end_conversation_workflow,
    }


def image_to_vue_component_workflow(prompt: str):
    """
    Generate a Vue component from an image
    """

    class VueComponentResponse(BaseModel):
        vue_component: str

    class FileNameResponse(BaseModel):
        file_name: str

    speak(build_feedback_prompt("Select an image to generate a Vue component from."))

    open_file_path = human_in_the_loop.open_file()

    print(f"🎆 Image selected at {open_file_path}")

    if not open_file_path:
        speak(
            build_feedback_prompt("No image found in clipboard. Skipping this request.")
        )
        return

    speak(
        build_feedback_prompt(
            "Okay I see the image, Now I'll generate the Vue component based on the image and your request."
        )
    )

    component_response: VueComponentResponse = llm.gpt4t_w_vision_image_with_model(
        dedent(f"""You're a Senior Vue 3 developer. You build new Vue components using the Composition API with <script setup lang='ts'>.
                   You strictly follow the REQUIREMENTS below.

                   REQUIREMENTS:
                        - Your current assignment is to build a new vue component that matches the image.
                        - Return strictly the code for the Vue component including <template>, <script setup lang='ts'>, and <style> sections.
                        - Use tailwind css to style the component.
                        - Respond in this JSON format exclusively: {{vue_component: ''}}
                """),
        file_path=open_file_path,
        pydantic_model=VueComponentResponse,
    )

    file_name_response: FileNameResponse = llm.gpt4t_w_vision_json_prompt(
        dedent(f"""You're a Senior Vue 3 developer. You build new Vue components using the Composition API with <script setup lang='ts'>.
                   You've just created the VUE_COMPONENT. Now you're naming the component.

                   Create a concise and descriptive name for the component.
                   Respond in this JSON format exclusively: {{file_name: ''}}

                   VUE_COMPONENT:
                       {component_response.vue_component}
               """),
        pydantic_model=FileNameResponse,
    )

    # dump to .vue file
    file_path = os.path.join(
        configuration["working_directory"], file_name_response.file_name
    )

    # write
    with open(file_path, "w") as file:
        file.write(component_response.vue_component)

    speak(
        build_feedback_prompt(
            f"I've created the Vue component and named it {file_name_response.file_name}. Let me know if you want to make any edits."
        )
    )

    human_in_the_loop.open_file_in_editor_and_continue(file_path)

    requested_updates = human_in_the_loop.open_editor()

    if not requested_updates:
        speak(build_feedback_prompt("No changes requested. Component ready for use."))
        return

    component_to_update = component_response.vue_component

    update_component_response: VueComponentResponse = llm.gpt4t_w_vision_json_prompt(
        dedent(f"""You're a Senior Vue 3 developer. You build new Vue components using the Composition API with <script setup lang='ts'>.
                   You've just created the VUE_COMPONENT. A change from your product manager has come in and you're now tasked with updating the component.
                   You follow the REQUIREMENTS below to make sure the component is updated correctly.

                   REQUIREMENTS:
                     - Your current assignment is to make updates to the VUE_COMPONENT based on the changes requested by the product manager.
                     - Return strictly the code for the Vue component including <template>, <script setup lang='ts'>, and <style> sections.
                     - Use tailwind css to style the component.
                     - Respond in this JSON format exclusively: {{vue_component: ''}}

                   REQUESTED_CHANGES:
                      {requested_updates}

                   VUE_COMPONENT:
                      {component_to_update}
                """),
        pydantic_model=VueComponentResponse,
    )

    # write to file
    with open(file_path, "w") as file:
        file.write(update_component_response.vue_component)

    speak(
        build_feedback_prompt(
            f"I've updated the Vue component based on your feedback. What's next?"
        )
    )

    pass


def run_bash_command_workflow(prompt: str):

    run_bash_prompt = dedent(
               f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.
                   You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.

                   You've been asked to run the following bash COMMAND: '{prompt}'

                   Here are available bash COMMANDS you can run:

                   # chrome browser
                   browser() {{
                      open -a 'Google Chrome' $1
                   }}

                   # typescript playground
                   playt() {{
                      cursor "/Users/ravix/Documents/projects/experimental/playt"
                   }}

                   chats() {{
                      browser "https://aistudio.google.com/app/prompts/new_chat"
                      browser "https://console.anthropic.com/workbench"
                      browser "https://chat.openai.com/"
                   }}

                   Based on the COMMAND - RESPOND WITH THE COMMAND to run in this JSON format: {{bash_command_to_run: ''}}.

                   Exclude any new lines or code blocks from the command. Respond with exclusively JSON.

                   Your COMMAND will be immediately run and the output will be returned to the user.
                """)

    class BashCommandResponse(BaseModel):
        bash_command_to_run: str

    response: BashCommandResponse = llm.gpt4t_w_vision_json_prompt(
        run_bash_prompt, pydantic_model=BashCommandResponse
    )

    print(f"👧 Raw response: ", response)

    command = response.bash_command_to_run

    print(f"💻 {PERSONAL_AI_ASSISTANT_NAME} is running this command: ", command)
    try:
        command = "source ~/.bash_profile && " + command
        result = subprocess.run(
            command,
            shell=True,
        )
        print(f"💻 Command executed successfully: {command}")
        print(f"💻 Output: {result.stdout}")
    except subprocess.CalledProcessError as e:
        print(f"💻 Error executing command: {command}\n💻 Error: {e}")
        return

    soft_talk_prompt = dedent(
              f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.
                  You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.

                  We both like short, concise, back-and-forth conversations.
                  We don't like small talk so we always steer our conversation back toward creating, building, product development, designing, and coding.

                  You've just helped your human companion run this bash COMMAND: {command}

                  Let your human companion know you've finished running the command and what you can do next.
               """)

    response = llm.gpro_1_5_prompt(soft_talk_prompt)

    speak(response)

    pass


def question_answer_workflow(prompt: str):

    question_answer_prompt = dedent(
              f"""{PERSONAL_AI_ASSISTANT_PROMPT_HEAD}
                  We don't like small talk so we always steer our conversation back toward creating, building, product development, designing, and coding.
                  We like to discuss in high level details without getting too technical.
                  Respond to the following question: {prompt}
               """)

    response = llm.gpro_1_5_prompt(question_answer_prompt)

    speak(response)

    pass


def soft_talk_workflow(prompt: str):

    soft_talk_prompt = dedent(
              f"""{PERSONAL_AI_ASSISTANT_PROMPT_HEAD}
                  We don't like small talk so we always steer our conversation back toward creating, building, product development, designing, and coding.
                  Respond to the following prompt: {prompt}
               """)

    response = llm.gpro_1_5_prompt(soft_talk_prompt)

    speak(response)

    return


def shell_command_workflow(prompt: str):

    shell_command_prompt = dedent(
              f"""You are a highly efficient, code-savvy AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.
                  You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.
                  Your task is to provide a JSON response with the following format: {{command_to_run: ''}} detailing the shell command
                  for the macOS bash shell to based on this question: {prompt}.

                  After generating the response, your command will be attached DIRECTLY to your human companions clipboard to be run.
               """)

    class ShellCommandModel(BaseModel):
        command_to_run: str

    response = llm.gpt4t_w_vision_json_prompt(
        prompt=shell_command_prompt,
        pydantic_model=ShellCommandModel,  # Assuming there's a suitable model or this parameter is handled appropriately within the function.
    )

    pyperclip.copy(response.command_to_run)

    completion_prompt = dedent(
              f"""You are a friendly, ultra helpful, attentive, concise AI assistant named '{PERSONAL_AI_ASSISTANT_NAME}'.
                  You work with your human companion '{HUMAN_COMPANION_NAME}' to build valuable experience through software.
                  We both like short, concise, back-and-forth conversations.

                  You've just attached the command '{response.command_to_run}' to your human companion's clipboard like they've requested.

                  Let your human companion know you've attached it and let them know you're ready for the  next task.
               """)

    completion_response = llm.gpro_1_5_prompt(completion_prompt)

    speak(completion_response)


def summarize_diff_workflow(start: str | dict, end: str | dict, file: str):
    """
    Summarize the diff between two strings
    """
    start = json.dumps(start, indent=2).splitlines()
    end = json.dumps(end, indent=2).splitlines()

    diff = difflib.unified_diff(start, end, fromfile="before", tofile="after")
    diffed = "\n".join(diff)

    summarize_prompt = dedent(f"""{PERSONAL_AI_ASSISTANT_PROMPT_HEAD}
                  Your companion has just finished editing the {file}.

                  You'll concisely summarize the changes made to the file in a 1 sentence summary.
                  The point is to communicate and acknowledge the changes made to the file.

                  The changes are:

                  {diffed}
               """)

    summarize_response = llm.gpro_1_5_prompt(summarize_prompt)

    speak(summarize_response)

    return diffed


def configure_assistant_workflow(prompt: str):
    """
    Configure settings for our assistant
    """

    configure_prompt = dedent(f"""{PERSONAL_AI_ASSISTANT_PROMPT_HEAD}
                  You've just opened a configuration file for your human companion.
                  Let your human companion know you've opened the file and are ready for them to edit it.
               """)

    prompt_response = llm.gpro_1_5_prompt(prompt=configure_prompt)

    speak(prompt_response)

    global configuration

    previous_configuration = configuration
    updated_config = human_file_json_prompt(configuration)
    with open(CONFIG_FILE, "w") as config_file:
        json.dump(updated_config, config_file, indent=2)

    summarize_diff_workflow(
        previous_configuration, updated_config, "configuration.json"
    )


def end_conversation_workflow(prompt: str):

    end_prompt = dedent(f"""{PERSONAL_AI_ASSISTANT_PROMPT_HEAD}
                  We're wrapping up our work for the day. You're a great engineering partner.
                  Thanks for all your help and for being a great engineering partner.

                  Respond to your human companions closing thoughts: {prompt}
               """)

    response = llm.gpro_1_5_prompt(end_prompt)

    speak(response)

    sys.exit()


def example_code_workflow(prompt: str):
    """
    Generate code for a given prompt
    """
    class ExampleCodeResponse(BaseModel):
        code: str

    class ExampleCodeFileNameResponse(BaseModel):
        file_name: str

    url_from_clipboard = pyperclip.paste()

    if not url_from_clipboard or "http" not in url_from_clipboard:
        speak(
            build_feedback_prompt(
                "I don't see a URL on your clipboard. Please paste a URL into your editor."
            )
        )

        url_from_clipboard = human_in_the_loop.open_editor()

    if not url_from_clipboard:
        speak(
            build_feedback_prompt(
                "Still no URL found in clipboard. Skipping this request."
            )
        )
        return

    print(f"🔗 Scraping URL found in clipboard: {url_from_clipboard}")

    speak(
        build_feedback_prompt(
            dedent(f"""I've found the URL in your clipboard.
                       I'll scrape the URL and example generate code for you.
                       But first, what about the example code would you like me to focus on?
                    """)
        )
    )

    feedback_for_code_generation = human_in_the_loop.open_editor()

    speak(
        build_feedback_prompt(
            f"Okay got it, I see you want to focus on '{feedback_for_code_generation}'. I'll generate the code for you now."
        )
    )

    scraped_markdown = scrape_to_markdown(url_from_clipboard)

    example_code_response_1: ExampleCodeResponse = llm.gpro_1_5_prompt_with_model(
        dedent(f"""You're a professional software developer advocate that takes pride in writing good code.
                   You take documentation, and convert it into runnable code.

                   You have a new request to generate code for the following url: '{url_from_clipboard}' with a focus on '{feedback_for_code_generation}'.

                   Given the scraped WEBSITE_CONTENT content below, generate working code to showcase how to run the code.

                   Focus on the code. Use detailed variable and function names. Comment every line of code explaining what it does.
                   Remember, this is code to showcase how the code works. It should be fully functional and runnable.
                   Respond in this JSON format exclusively: {{code: ''}}

                   WEBSITE_CONTENT:
                     {scraped_markdown}
                """),
        pydantic_model=ExampleCodeResponse,
    )

    print(f"👧 Raw response: v1\n\n", example_code_response_1.code)

    example_code_response_2 = llm.gpt4t_w_vision_json_prompt(
        dedent(f"""You are an elite level, principle software engineer.

                   You work with a co-engineer that likes to leave non-runnable code in the code so you're responsible for making sure it's runnable.
                   You've just generated the first draft EXAMPLE_CODE below.

                   You're now taking a second pass to clean it up to make sure it meets the REQUIREMENTS

                   REQUIREMENTS:
                      - Make sure it's immediately runnable and functional.
                      - Removing anything that isn't runnable code.
                      - This code will be immediately placed into a file and run.
                      - The code should be well commented so it's easy to understand.
                      - The code should be well formatted so it's easy to read.
                      - The code should use verbose variable and function names.
                      - You pay close attention to indentation.
                      - Respond in JSON format with the following keys: {{code: ''}}

                   EXAMPLE_CODE:
                     {example_code_response_1.code}
                """),
        pydantic_model=ExampleCodeResponse,
    )

    print(f"👧 Raw response: v2\n\n", example_code_response_2.code)

    example_code_response_3 = llm.gpt4t_w_vision_json_prompt(
        dedent(f"""You are a top-level programmer and super-expert in software engineering.

                   You've received a near final draft of code to finalize.
                   You work with a co-engineer that likes to leave non-runnable code in the code so you're responsible for making sure it's runnable.
                   You're taking a final pass to make sure the code is near perfect and fully runnable.
                   You follow the REQUIREMENTS below to make sure the code is top notch for production deployment.

                   REQUIREMENTS:
                      - Make sure the code is immediately runnable and functional.
                      - Removing anything that isn't runnable code.
                      - This code will be immediately placed into a file and run.
                      - The code follows expert coding best practices.
                      - The code should be well commented so it's easy to understand.
                      - The code should be well formatted so it's easy to read.
                      - The code should use verbose variable and function names.
                      - You pay close attention to indentation.
                      - Respond in JSON format with the following keys: {{code: ''}}

                   EXAMPLE_CODE:
                      {example_code_response_2.code}
                """),
        pydantic_model=ExampleCodeResponse,
    )

    print(f"👧 Raw response: v3\n\n", example_code_response_3.code)

    example_code_file_prompt = dedent(f"""{PERSONAL_AI_ASSISTANT_PROMPT_HEAD}

                  You've just generated the following CODE below for your human companion.
                  Create a file name for the code file that will be written to the following directory: {configuration['working_directory']}
                  The file name should be unique and descriptive of the code it contains.
                  Respond exclusively with the file name in the following JSON format: {{file_name: ''}}.

                  CODE:
                    {example_code_response_3.code}
               """)

    example_code_file_name_response = llm.gpt4t_w_vision_json_prompt(
        example_code_file_prompt,
        pydantic_model=ExampleCodeFileNameResponse,
    )

    new_file_name = example_code_file_name_response.file_name

    new_file_path = os.path.join((configuration["working_directory"]), new_file_name)

    # write the code to the file
    with open(new_file_path, "w") as file:
        file.write(example_code_response_3.code)

    print(f"✅ Code example written to {new_file_path}")

    speak(
        build_feedback_prompt(
            f"Code has been written to the working directory into a file named {new_file_name}. Let me know if you need anything else."
        )
    )

    pass


# --------------------- Helper Methods ---------------------


def human_file_json_prompt(contents: dict):
    """
    Prompt the user to edit the file
    """
    edited_contents = editor.edit(contents=json.dumps(contents, indent=2))
    edited_config = json.loads(edited_contents.decode())

    return edited_config


def scrape_to_markdown(url):
    # Send a GET request to the URL
    response = requests.get(url)

    # Create a BeautifulSoup object to parse the HTML content
    soup = BeautifulSoup(response.content, "html.parser")

    # Convert the parsed HTML to Markdown using markdownify
    markdown = markdownify(str(soup), strip=["script", "style"])

    return markdown


def build_feedback_prompt(message: str):
    """
    Build a prompt using the existing prompt format and ask our assistant to respond given the 'message'
    """
    prompt = dedent(f"""{PERSONAL_AI_ASSISTANT_PROMPT_HEAD}
                        Concisely communicate the following message to your human companion: '{message}'
                     """)

    response = llm.gpro_1_5_prompt(prompt)

    return response


# --------------------- AUDIO I/O ---------------------


def speak(text: str):
    client = ElevenLabs(
        api_key=os.getenv("ELEVEN_API_KEY"),  # Defaults to ELEVEN_API_KEY from .env
    )

    # text=text, voice="WejK3H1m7MI9CHnIjW9K",
    audio = client.generate(
        text=text,
        voice="x7h79ussrwcHQUQgfa0n",
        model="eleven_turbo_v2",
        # model="eleven_multilingual_v2",
    )

    play(audio)


def transcribe_audio_file(file_path):
    try:
        # STEP 1 Create a Deepgram client using the API key
        api_key = os.getenv("DEEPGRAM_API_KEY")
        dg_client = DeepgramClient(api_key)

        # STEP 2 Read the recorded audio file
        with open(file_path, "rb") as file:
            buffer_data = file.read()

        #STEP 2: Configure Deepgram options for audio analysis
        payload: FileSource = { "buffer": buffer_data }
        options = PrerecordedOptions(model="nova-2", smart_format=True)

        # STEP 3: Call the transcribe_file method with the text payload and options
        response = dg_client.listen.prerecorded.v("1").transcribe_file(payload, options)

        # STEP 4: Await the response and extract the transcript
        transcript = response['results']['channels'][0]['alternatives'][0]['transcript']

        return transcript

    except Exception as e:
        print(f"Exception: {e}")
        return ""


def track_interaction_time():
    """Track the time it takes for the user to interact with the system in seconds."""
    global iteration_start_time
    if iteration_start_time:
        interaction_time = (datetime.now() - iteration_start_time).total_seconds()
        print(f"🕒 Interaction time: {interaction_time} seconds")
        iteration_start_time = None


def record_audio(duration=10, fs=44100):
    """Record audio from the microphone."""
    track_interaction_time()

    print("🔴 Recording...")
    recording = sd.rec(
        int(duration * fs), samplerate=fs, channels=channels, dtype="int16"
    )
    sd.wait()
    print(f"🎧 Recording Chunk Complete")
    global iteration_start_time
    iteration_start_time = datetime.now()
    return recording


def save_audio_file(recording, fs=44100, filename="output.wav"):
    """Save the recorded audio to a file."""
    with wave.open(filename, "wb") as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(2)
        wf.setframerate(fs)
        wf.writeframes(recording)


def personal_ai_assistant_loop(
    audio_chunk_size=10, activation_keyword=ACTIVATION_KEYWORD, on_keywords=None
):
    while True:
        recording = record_audio(duration=audio_chunk_size)
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"audio_{timestamp}.wav"
        save_audio_file(recording, filename=filename)
        file_size = os.path.getsize(filename)
        print(f"📁 File {filename} has been saved with a size of {file_size} bytes.")
        transcript = transcribe_audio_file(filename)
        print("📝 transcript was:", transcript)
        if activation_keyword.lower() in transcript.lower():
            if on_keywords:
                on_keywords(transcript)
        os.remove(filename)


def text_after_keyword(transcript: str, keyword:str):
    """Extract and return the text that comes after a specified keyword in the transcript."""
    try:
        # Find the position of the keyword in the transcript
        keyword_position = transcript.lower().find(keyword.lower())
        if keyword_position == -1:
            # If the keyword is not found, return an empty string
            return ""
        # Extract the text after the keyword
        text_after = transcript[keyword_position + len(keyword) :].strip()
        return text_after
    except Exception as e:
        print(f"Error extracting text after keyword: {e}")
        return ""


def get_first_keyword_in_prompt(prompt: str):
    map_keywords_to_agents = get_simple_keyword_ai_agent_router()
    for keyword_group, agent in map_keywords_to_agents.items():
        keywords = keyword_group.split(",")
        for keyword in keywords:
            if keyword in prompt.lower():
                return agent, keyword
    return None, None


def on_activation_keyword_detected(transcript: str):
    print("✅ Activation keyword detected!, transcript is: ", transcript)

    prompt = text_after_keyword(transcript, ACTIVATION_KEYWORD)

    print("🔍 prompt is: ", prompt)

    agent_to_run, agent_keyword = get_first_keyword_in_prompt(prompt)

    if not agent_to_run:
        print("❌ No agent found for the given prompt.")
        return

    print(f"✅ Found agent via keyword '{agent_keyword}'")

    agent_to_run(prompt)


personal_ai_assistant_loop(on_keywords=on_activation_keyword_detected)

## parser.py
import json
import re


def parse_json_from_gemini(json_str: str):
    """Parses a dictionary from a JSON-like object string.

    Args:
      json_str: A string representing a JSON-like object, e.g.:
        ```json
        {
          "key1": "value1",
          "key2": "value2"
        }
        ```

    Returns:
      A dictionary representing the parsed object, or None if parsing fails.
    """

    try:
        # Remove potential leading/trailing whitespace
        json_str = json_str.strip()

        # Extract JSON content from triple backticks and "json" language specifier
        json_match = re.search(r"```json\s*(.*?)\s*```", json_str, re.DOTALL)

        if json_match:
            json_str = json_match.group(1)

        return json.loads(json_str)
    except (json.JSONDecodeError, AttributeError):
        return None

## voice_recorder.py
import sounddevice as sd
import numpy as np
import vosk
import queue

class VoiceRecorder:
    def __init__(self, model_path='model', device=None, activation_keyword='Hello Ada', end_keyword='thanks', stop_keyword='stop recording'):
        self.model = vosk.Model(model_path)
        self.device = device
        self.activation_keyword = activation_keyword.lower()
        self.end_keyword = end_keyword.lower()
        self.stop_keyword = stop_keyword.lower()
        self.interaction_transcript = ""
        self.recording = False
        self.q = queue.Queue()

    def callback(self, indata, frames, time, status):
        self.q.put(bytes(indata))

    def continuous_listen(self):
        with sd.RawInputStream(callback=self.callback, device=self.device, dtype='int16',
                               channels=1, samplerate=16000) as stream:
            rec = vosk.KaldiRecognizer(self.model, stream.samplerate)
            while True:
                data = self.q.get()
                if rec.AcceptWaveform(data):
                    result = rec.Result()
                    continue_listening = self.process_result(eval(result)['text'])
                    if not continue_listening:
                        print("Shutting down the listening process.")
                        break

    def process_result(self, transcript):
        print(f"Detected: {transcript}")
        if self.activation_keyword in transcript and not self.recording:
            self.start_interaction()
        elif self.end_keyword in transcript and self.recording:
            self.stop_interaction()
        elif self.stop_keyword in transcript:
            return False
        if self.recording:
            self.interaction_transcript += " " + transcript
        return True

    def start_interaction(self):
        print("Starting interaction ...")
        self.recording = True

    def stop_interaction(self):
        print("Stopping interaction ...")
        self.process_command(self.interaction_transcript)
        self.interaction_transcript = ""
        self.recording = False

    def process_command(self, transcript):
        # Process the recorded audio or perform actions based on the last command
        print(f"Processing command: {transcript}")


# Example usage:
if __name__ == "__main__":
# Ensure you have a Vosk model directory.
   recorder = VoiceRecorder('./audio_models/vosk-model-en-us-0.22-lgraph')
   recorder.continuous_listen()
	import subprocess
	import os
	import random
	import time

	def edit(contents: str):
	"""
	Opens TextEdit on macOS and waits until it is closed to proceed.
	"""
	# Get the current working directory
	current_dir = os.getcwd()
	# Generate a random number to include in the filename
	random_number = random.randint(1000, 9999)
	temp_file_path = os.path.join(current_dir, f'tempfile_{random_number}.json')

	# Create and close the temporary file explicitly
	with open(temp_file_path, 'w+') as tmp:
	tmp.write(contents)
	tmp.flush()

	# Change the file permissions to make it readable and writable by everyone
	os.chmod(temp_file_path, 0o666)

	# Introduce a delay
	time.sleep(1) # Wait for 1 second before opening the file in Editor

	# Open the default text editor and wait for it to close
	editor_process = subprocess.Popen(['open', '-W', '-n', '-a', 'TextEdit', temp_file_path])

	# Wait for the TextEdit process to close
	editor_process.wait()

	# Read the modified content from the file
	with open(temp_file_path, 'r') as file:
	modified_content = file.read()

	# Clean up by removing the temporary file
	os.remove(temp_file_path)

	return modified_content

	# Example usage:
	if __name__ == "__main__":
	sample_contents = "How are you doing this. Tell me more about it:"
	modified_config = edit(sample_contents)
	print(modified_config)
	import subprocess
	import tkinter as tk
	from tkinter import filedialog
	from modules import editor


	def open_file() -> str:
	"""Opens a file selection dialog and returns the selected file path."""
	root = tk.Tk()
	root.withdraw() # Hide the main window
	file_path = filedialog.askopenfile()
	if not file_path:
	return None
	root.destroy()
	return file_path.name


	def open_editor() -> str:
	return editor.edit(contents="")


	def open_file_in_editor_and_continue(file: str) -> None:
	"""Opens a file in the editor using the 'code' command and allows the user to continue editing."""
	if file:
	subprocess.run(["code", file])
	else:
	print("No file provided to open.")
	import base64
	import google.generativeai as genai
	import os
	from dotenv import load_dotenv
	from pydantic import BaseModel
	from modules import parsers
	import openai

	# Load environment variables from .env file
	load_dotenv()
	api_key = os.environ["GOOGLE_API_KEY"]

	openai.api_key = os.environ.get("OPENAI_API_KEY")

	# Initialize Google API Client
	genai.configure(api_key=api_key)


	def gpro_1_5_prompt(prompt) -> str:
	"""
	Generates content based on the provided prompt using the Gemini 1.5 API model and returns the text part of the first candidate's content.

	Args:
	- prompt (str): The prompt to generate content for.

	Returns:
	- str: The text part of the first candidate's content from the generated response.
	"""
	model_name = "models/gemini-1.5-pro-latest"
	gen_config = genai.GenerationConfig()
	model = genai.GenerativeModel(model_name=model_name)
	response = model.generate_content(prompt, request_options={})
	return response.candidates[0].content.parts[0].text


	def gpro_1_5_prompt_with_model(prompt, pydantic_model: BaseModel) -> BaseModel:
	"""
	Generates content based on the provided prompt using the Gemini 1.5 API model and returns the text part of the first candidate's content.

	Args:
	- prompt (str): The prompt to generate content for.

	Returns:
	- str: The text part of the first candidate's content from the generated response.
	"""
	model_name = "models/gemini-1.5-pro-latest"
	gen_config = genai.GenerationConfig()
	model = genai.GenerativeModel(model_name=model_name)
	response = model.generate_content(prompt, request_options={})
	response_text = response.candidates[0].content.parts[0].text
	if "```json" in response_text:
	return pydantic_model.model_validate(
	parsers.parse_json_from_gemini(response_text)
	)
	else:
	return pydantic_model.model_validate_json(response_text)


	def gpt4t_w_vision_json_prompt(
	prompt: str,
	model: str = "gpt-4-turbo-2024-04-09",
	instructions: str = "You are a helpful assistant that response in JSON format.",
	pydantic_model: BaseModel = None,
	) -> str:
	response = openai.chat.completions.create(
	model=model,
	messages=[
	{
	"role": "system",
	"content": instructions, # Added instructions as a system message
	},
	{
	"role": "user",
	"content": prompt,
	},
	],
	response_format={"type": "json_object"},
	)

	response_text = response.choices[0].message.content
	print(f"Text LLM response: {response_text}")

	as_model = pydantic_model.model_validate_json(response_text)

	return as_model


	def gpt4t_w_vision(
	prompt: str,
	model: str = "gpt-4-turbo-2024-04-09",
	instructions: str = "You are a helpful assistant.",
	) -> str:
	response = openai.chat.completions.create(
	model=model,
	messages=[
	{
	"role": "system",
	"content": instructions, # Added instructions as a system message
	},
	{
	"role": "user",
	"content": prompt,
	},
	],
	)

	response_text = response.choices[0].message.content
	return response_text


	def encode_image(image_path: str) -> str:
	with open(image_path, "rb") as image_file:
	return base64.b64encode(image_file.read()).decode("utf-8")


	def gpt4t_w_vision_image_with_model(
	prompt: str,
	file_path: str,
	model: str = "gpt-4-turbo-2024-04-09",
	instructions: str = "You are a helpful assistant that specializes in image analysis.",
	pydantic_model: BaseModel = None,
	):

	file_extension = file_path.split(".")[-1]

	base64_image = encode_image(file_path)

	print("base64_image", base64_image)

	response = openai.chat.completions.create(
	model=model,
	messages=[
	{
	"role": "system",
	"content": instructions,
	},
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": prompt,
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/{file_extension};base64,{base64_image}"
	},
	},
	],
	},
	],
	response_format={"type": "json_object"},
	)

	print("response", response)

	response_text = response.choices[0].message.content

	print("response_text", response_text)

	parsed_response = pydantic_model.model_validate_json(response_text)

	return parsed_response
	import json
	import re


	def parse_json_from_gemini(json_str: str):
	"""Parses a dictionary from a JSON-like object string.

	Args:
	json_str: A string representing a JSON-like object, e.g.:
	```json
	{
	"key1": "value1",
	"key2": "value2"
	}
	```

	Returns:
	A dictionary representing the parsed object, or None if parsing fails.
	"""

	try:
	# Remove potential leading/trailing whitespace
	json_str = json_str.strip()

	# Extract JSON content from triple backticks and "json" language specifier
	json_match = re.search(r"```json\s(.?)\s*```", json_str, re.DOTALL)

	if json_match:
	json_str = json_match.group(1)

	return json.loads(json_str)
	except (json.JSONDecodeError, AttributeError):
	return None
	import sounddevice as sd
	import numpy as np
	import vosk
	import queue

	class VoiceRecorder:
	def __init__(self, model_path='model', device=None, activation_keyword='Hello Ada', end_keyword='thanks', stop_keyword='stop recording'):
	self.model = vosk.Model(model_path)
	self.device = device
	self.activation_keyword = activation_keyword.lower()
	self.end_keyword = end_keyword.lower()
	self.stop_keyword = stop_keyword.lower()
	self.interaction_transcript = ""
	self.recording = False
	self.q = queue.Queue()

	def callback(self, indata, frames, time, status):
	self.q.put(bytes(indata))

	def continuous_listen(self):
	with sd.RawInputStream(callback=self.callback, device=self.device, dtype='int16',
	channels=1, samplerate=16000) as stream:
	rec = vosk.KaldiRecognizer(self.model, stream.samplerate)
	while True:
	data = self.q.get()
	if rec.AcceptWaveform(data):
	result = rec.Result()
	continue_listening = self.process_result(eval(result)['text'])
	if not continue_listening:
	print("Shutting down the listening process.")
	break

	def process_result(self, transcript):
	print(f"Detected: {transcript}")
	if self.activation_keyword in transcript and not self.recording:
	self.start_interaction()
	elif self.end_keyword in transcript and self.recording:
	self.stop_interaction()
	elif self.stop_keyword in transcript:
	return False
	if self.recording:
	self.interaction_transcript += " " + transcript
	return True

	def start_interaction(self):
	print("Starting interaction ...")
	self.recording = True

	def stop_interaction(self):
	print("Stopping interaction ...")
	self.process_command(self.interaction_transcript)
	self.interaction_transcript = ""
	self.recording = False

	def process_command(self, transcript):
	# Process the recorded audio or perform actions based on the last command
	print(f"Processing command: {transcript}")


	# Example usage:
	if __name__ == "__main__":
	# Ensure you have a Vosk model directory.
	recorder = VoiceRecorder('./audio_models/vosk-model-en-us-0.22-lgraph')
	recorder.continuous_listen()