samilkorkmaz/ui_with_ai.py

## ui_with_ai.py
# Listen to user speech and convert it to UI commands
# Şamil Korkmaz, 20.10.2024
import speech_recognition as sr
import openai
import time
import getpass
from datetime import datetime
import json

def record_audio(duration):
    """Record audio from microphone for specified duration"""
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print(f"Recording for {duration} seconds...")
        audio = recognizer.record(source, duration=duration)
        print("Recording complete!")
    return audio

def transcribe_audio(audio):
    """Transcribe audio using OpenAI Whisper"""
    try:
        # Save audio to temporary file
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        temp_filename = f"temp_audio_{timestamp}.wav"
        with open(temp_filename, "wb") as f:
            f.write(audio.get_wav_data())

        # Use OpenAI client for Whisper
        with open(temp_filename, "rb") as audio_file:
            transcript = openai.audio.transcriptions.create(
                model="whisper-1",
                file=audio_file
            )
        return transcript.text
    except Exception as e:
        print(f"Error in transcription: {str(e)}")
        return None

# Function to get the user command and process it using the chat endpoint
def get_user_command_action(user_command):
    messages = [
        {'role': 'system', 'content': 'You are an assistant that provides structured JSON responses based on user commands.'},
        {'role': 'user', 'content': f"Interpret the following user command: '{user_command}' and provide the action as a structured JSON response. The available actions are:\n1. log_in\n2. sign_up\n3. view_products\n4. search_products\n5. add_to_cart\n6. checkout\n7. view_orders\n8. log_out\n9. contact_support\nRespond with the appropriate action as: {{\"action\": \"<action_name>\"}}"}
    ]

    response = openai.chat.completions.create(
        model='gpt-3.5-turbo',
        messages=messages
    )

    action = response.choices[0].message.content

    return action

def process_action(action_data):
    action = json.loads(action_data).get('action', 'unknown')

    # Define action handling logic
    if action == 'log_in':
        print("Redirecting to Log In page...")

    elif action == 'sign_up':
        print("Redirecting to Sign Up page...")

    elif action == 'view_products':
        print("Showing product listings...")

    elif action == 'search_products':
        print("Initiating product search...")

    elif action == 'add_to_cart':
        print("Adding product to cart...")

    elif action == 'checkout':
        print("Proceeding to checkout...")

    elif action == 'view_orders':
        print("Showing order history...")

    elif action == 'log_out':
        print("Logging out...")

    elif action == 'contact_support':
        print("Redirecting to support page...")

    else:
        print("Unknown action!")

def main():
    openai.api_key = "YOUR OPEN AI KEY"
    try:
        audio = record_audio(5)
        # Transcribe audio
        print("\nTranscribing audio...")
        transcript = transcribe_audio(audio)
        if transcript:
            print(f"\nTranscript: {transcript}")
            print("\nGetting response from GPT...")
            response = get_user_command_action(transcript)
            if response:
                print("\nGPT Response:")
                print(response)
            process_action(response)

    except Exception as e:
        print(f"An error occurred: {str(e)}")

if __name__ == "__main__":
    main()
	# Listen to user speech and convert it to UI commands
	# Şamil Korkmaz, 20.10.2024
	import speech_recognition as sr
	import openai
	import time
	import getpass
	from datetime import datetime
	import json

	def record_audio(duration):
	"""Record audio from microphone for specified duration"""
	recognizer = sr.Recognizer()
	with sr.Microphone() as source:
	print(f"Recording for {duration} seconds...")
	audio = recognizer.record(source, duration=duration)
	print("Recording complete!")
	return audio

	def transcribe_audio(audio):
	"""Transcribe audio using OpenAI Whisper"""
	try:
	# Save audio to temporary file
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	temp_filename = f"temp_audio_{timestamp}.wav"
	with open(temp_filename, "wb") as f:
	f.write(audio.get_wav_data())

	# Use OpenAI client for Whisper
	with open(temp_filename, "rb") as audio_file:
	transcript = openai.audio.transcriptions.create(
	model="whisper-1",
	file=audio_file
	)
	return transcript.text
	except Exception as e:
	print(f"Error in transcription: {str(e)}")
	return None

	# Function to get the user command and process it using the chat endpoint
	def get_user_command_action(user_command):
	messages = [
	{'role': 'system', 'content': 'You are an assistant that provides structured JSON responses based on user commands.'},
	{'role': 'user', 'content': f"Interpret the following user command: '{user_command}' and provide the action as a structured JSON response. The available actions are:\n1. log_in\n2. sign_up\n3. view_products\n4. search_products\n5. add_to_cart\n6. checkout\n7. view_orders\n8. log_out\n9. contact_support\nRespond with the appropriate action as: {{\"action\": \"<action_name>\"}}"}
	]

	response = openai.chat.completions.create(
	model='gpt-3.5-turbo',
	messages=messages
	)

	action = response.choices[0].message.content

	return action

	def process_action(action_data):
	action = json.loads(action_data).get('action', 'unknown')

	# Define action handling logic
	if action == 'log_in':
	print("Redirecting to Log In page...")

	elif action == 'sign_up':
	print("Redirecting to Sign Up page...")

	elif action == 'view_products':
	print("Showing product listings...")

	elif action == 'search_products':
	print("Initiating product search...")

	elif action == 'add_to_cart':
	print("Adding product to cart...")

	elif action == 'checkout':
	print("Proceeding to checkout...")

	elif action == 'view_orders':
	print("Showing order history...")

	elif action == 'log_out':
	print("Logging out...")

	elif action == 'contact_support':
	print("Redirecting to support page...")

	else:
	print("Unknown action!")

	def main():
	openai.api_key = "YOUR OPEN AI KEY"
	try:
	audio = record_audio(5)
	# Transcribe audio
	print("\nTranscribing audio...")
	transcript = transcribe_audio(audio)
	if transcript:
	print(f"\nTranscript: {transcript}")
	print("\nGetting response from GPT...")
	response = get_user_command_action(transcript)
	if response:
	print("\nGPT Response:")
	print(response)
	process_action(response)

	except Exception as e:
	print(f"An error occurred: {str(e)}")

	if __name__ == "__main__":
	main()