0xrushi/temporary_01_code_without_sockets.py

## temporary_01_code_without_sockets.py
# Uses OPENAI API KEY

import time
import threading
import numpy as np
import sounddevice as sd
from queue import Queue
from rich.console import Console
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
from langchain.prompts import PromptTemplate
from langchain_community.llms import Ollama
from scipy.io.wavfile import write
from openai import OpenAI
from interpreter import interpreter
from langchain_community import llms

console = Console()

def save_numpy_to_wav(numpy_array, file_path, sample_rate=16000):
    write(file_path, sample_rate, numpy_array)


template = """
You are an AI assistant that helps users convert their broken English descriptions into a sequence of tasks.

Analyze if the statement says multiple tasks or a single. If there are multiple tasks, write a plan. **Always recap the plan between each code block** (you have extreme short-term memory loss, so you need to recap the plan between each message block to retain it).
When you execute code, it will be executed **on the user's machine**. The user has given you **full and complete permission** to execute any code necessary to complete the task. Execute the code.
You can access the internet. Run **any code** to achieve the goal, and if at first you don't succeed, try again and again.
You can install new packages.
When a user refers to a filename, they're likely referring to an existing file in the directory you're currently executing code in.
In general, try to **make plans** with as few steps as possible. As for actually executing code to carry out that plan, for *stateful* languages (like python, javascript, shell, but NOT for html which starts from 0 every time).
You are capable of **any** task.

The conversation transcript is as follows:
{relevant_commands}
And here is the user's follow-up: {instruction}
Your response:
"""

prompt = PromptTemplate(input_variables=["instruction", "relevant_commands"], template=template)

class stt:
    def transcribe(audio_array, **kwargs):
        save_numpy_to_wav(audio_array, "output.wav")

        client = OpenAI()
        with open("output.wav", "rb") as audio_file:
            transcription = client.audio.transcriptions.create(model="whisper-1", file=audio_file)
            print(transcription.text)

        return transcription.text

def record_audio(stop_event, data_queue):
    """
    Captures audio data from the user's microphone and adds it to a queue for further processing.
    Args:
        stop_event (threading.Event): An event that, when set, signals the function to stop recording.
        data_queue (queue.Queue): A queue to which the recorded audio data will be added.
    Returns:
        None
    """
    def callback(indata, frames, time, status):
        if status:
            console.print(status)
        data_queue.put(bytes(indata))

    with sd.RawInputStream(
        samplerate=16000, dtype="int16", channels=1, callback=callback
    ):
        while not stop_event.is_set():
            time.sleep(0.1)

def transcribe(audio_np: np.ndarray) -> str:
    """
    Transcribes the given audio data using the Whisper speech recognition model.
    Args:
        audio_np (numpy.ndarray): The audio data to be transcribed.
    Returns:
        str: The transcribed text.
    """
    result = stt.transcribe(audio_np)  # Set fp16=True if using a GPU
    text = result
    return text

def get_llm_response(text: str) -> str:
    """
    Generates a response to the given text using the Llama-2 language model.
    Args:
        text (str): The input text to be processed.
    Returns:
        str: The generated response.
    """

    filled_prompt = prompt.format(instruction=text, relevant_commands="")
    interpreter.auto_run = True
    chunk = interpreter.chat(filled_prompt, display=True, stream=False)

    return (chunk[0])["content"]

def play_audio(sample_rate, audio_array):
    """
    Plays the given audio data using the sounddevice library.
    Args:
        sample_rate (int): The sample rate of the audio data.
        audio_array (numpy.ndarray): The audio data to be played.
    Returns:
        None
    """
    sd.play(audio_array, sample_rate)
    sd.wait()


if __name__ == "__main__":
    console.print("[cyan]Assistant started! Press Ctrl+C to exit.")

    try:
        while True:
            console.input(
                "Press Enter to start recording, then press Enter again to stop."
            )

            data_queue = Queue()  # type: ignore[var-annotated]
            stop_event = threading.Event()
            recording_thread = threading.Thread(
                target=record_audio,
                args=(stop_event, data_queue),
            )
            recording_thread.start()

            input()
            stop_event.set()
            recording_thread.join()

            audio_data = b"".join(list(data_queue.queue))
            audio_np = (
                np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0
            )

            if audio_np.size > 0:
                with console.status("Transcribing...", spinner="earth"):
                    text = transcribe(audio_np)
                console.print(f"[yellow]You: {text}")

                with console.status("Generating response...", spinner="earth"):
                    response = get_llm_response(text)
                    # sample_rate, audio_array = tts.long_form_synthesize(response)
                    print(response)

                console.print(f"[cyan]Assistant: {response}")
                # play_audio(44000, audio_array)
            else:
                console.print(
                    "[red]No audio recorded. Please ensure your microphone is working."
                )

    except KeyboardInterrupt:
        console.print("\n[red]Exiting...")

    console.print("[blue]Session ended.")
	# Uses OPENAI API KEY

	import time
	import threading
	import numpy as np
	import sounddevice as sd
	from queue import Queue
	from rich.console import Console
	from langchain.memory import ConversationBufferMemory
	from langchain.chains import ConversationChain
	from langchain.prompts import PromptTemplate
	from langchain_community.llms import Ollama
	from scipy.io.wavfile import write
	from openai import OpenAI
	from interpreter import interpreter
	from langchain_community import llms

	console = Console()

	def save_numpy_to_wav(numpy_array, file_path, sample_rate=16000):
	write(file_path, sample_rate, numpy_array)


	template = """
	You are an AI assistant that helps users convert their broken English descriptions into a sequence of tasks.

	Analyze if the statement says multiple tasks or a single. If there are multiple tasks, write a plan. Always recap the plan between each code block (you have extreme short-term memory loss, so you need to recap the plan between each message block to retain it).
	When you execute code, it will be executed on the user's machine. The user has given you full and complete permission to execute any code necessary to complete the task. Execute the code.
	You can access the internet. Run any code to achieve the goal, and if at first you don't succeed, try again and again.
	You can install new packages.
	When a user refers to a filename, they're likely referring to an existing file in the directory you're currently executing code in.
	In general, try to make plans with as few steps as possible. As for actually executing code to carry out that plan, for stateful languages (like python, javascript, shell, but NOT for html which starts from 0 every time).
	You are capable of any task.

	The conversation transcript is as follows:
	{relevant_commands}
	And here is the user's follow-up: {instruction}
	Your response:
	"""

	prompt = PromptTemplate(input_variables=["instruction", "relevant_commands"], template=template)

	class stt:
	def transcribe(audio_array, **kwargs):
	save_numpy_to_wav(audio_array, "output.wav")

	client = OpenAI()
	with open("output.wav", "rb") as audio_file:
	transcription = client.audio.transcriptions.create(model="whisper-1", file=audio_file)
	print(transcription.text)

	return transcription.text

	def record_audio(stop_event, data_queue):
	"""
	Captures audio data from the user's microphone and adds it to a queue for further processing.
	Args:
	stop_event (threading.Event): An event that, when set, signals the function to stop recording.
	data_queue (queue.Queue): A queue to which the recorded audio data will be added.
	Returns:
	None
	"""
	def callback(indata, frames, time, status):
	if status:
	console.print(status)
	data_queue.put(bytes(indata))

	with sd.RawInputStream(
	samplerate=16000, dtype="int16", channels=1, callback=callback
	):
	while not stop_event.is_set():
	time.sleep(0.1)

	def transcribe(audio_np: np.ndarray) -> str:
	"""
	Transcribes the given audio data using the Whisper speech recognition model.
	Args:
	audio_np (numpy.ndarray): The audio data to be transcribed.
	Returns:
	str: The transcribed text.
	"""
	result = stt.transcribe(audio_np) # Set fp16=True if using a GPU
	text = result
	return text

	def get_llm_response(text: str) -> str:
	"""
	Generates a response to the given text using the Llama-2 language model.
	Args:
	text (str): The input text to be processed.
	Returns:
	str: The generated response.
	"""

	filled_prompt = prompt.format(instruction=text, relevant_commands="")
	interpreter.auto_run = True
	chunk = interpreter.chat(filled_prompt, display=True, stream=False)

	return (chunk[0])["content"]

	def play_audio(sample_rate, audio_array):
	"""
	Plays the given audio data using the sounddevice library.
	Args:
	sample_rate (int): The sample rate of the audio data.
	audio_array (numpy.ndarray): The audio data to be played.
	Returns:
	None
	"""
	sd.play(audio_array, sample_rate)
	sd.wait()


	if __name__ == "__main__":
	console.print("[cyan]Assistant started! Press Ctrl+C to exit.")

	try:
	while True:
	console.input(
	"Press Enter to start recording, then press Enter again to stop."
	)

	data_queue = Queue() # type: ignore[var-annotated]
	stop_event = threading.Event()
	recording_thread = threading.Thread(
	target=record_audio,
	args=(stop_event, data_queue),
	)
	recording_thread.start()

	input()
	stop_event.set()
	recording_thread.join()

	audio_data = b"".join(list(data_queue.queue))
	audio_np = (
	np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0
	)

	if audio_np.size > 0:
	with console.status("Transcribing...", spinner="earth"):
	text = transcribe(audio_np)
	console.print(f"[yellow]You: {text}")

	with console.status("Generating response...", spinner="earth"):
	response = get_llm_response(text)
	# sample_rate, audio_array = tts.long_form_synthesize(response)
	print(response)

	console.print(f"[cyan]Assistant: {response}")
	# play_audio(44000, audio_array)
	else:
	console.print(
	"[red]No audio recorded. Please ensure your microphone is working."
	)

	except KeyboardInterrupt:
	console.print("\n[red]Exiting...")

	console.print("[blue]Session ended.")