Skip to content

Instantly share code, notes, and snippets.

@samilkorkmaz
Last active November 11, 2024 19:33
Listen to user speech and convert it to UI commands
# Listen to user speech and convert it to UI commands
# Şamil Korkmaz, 20.10.2024
import speech_recognition as sr
import openai
import time
import getpass
from datetime import datetime
import json
def record_audio(duration):
"""Record audio from microphone for specified duration"""
recognizer = sr.Recognizer()
with sr.Microphone() as source:
print(f"Recording for {duration} seconds...")
audio = recognizer.record(source, duration=duration)
print("Recording complete!")
return audio
def transcribe_audio(audio):
"""Transcribe audio using OpenAI Whisper"""
try:
# Save audio to temporary file
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
temp_filename = f"temp_audio_{timestamp}.wav"
with open(temp_filename, "wb") as f:
f.write(audio.get_wav_data())
# Use OpenAI client for Whisper
with open(temp_filename, "rb") as audio_file:
transcript = openai.audio.transcriptions.create(
model="whisper-1",
file=audio_file
)
return transcript.text
except Exception as e:
print(f"Error in transcription: {str(e)}")
return None
# Function to get the user command and process it using the chat endpoint
def get_user_command_action(user_command):
messages = [
{'role': 'system', 'content': 'You are an assistant that provides structured JSON responses based on user commands.'},
{'role': 'user', 'content': f"Interpret the following user command: '{user_command}' and provide the action as a structured JSON response. The available actions are:\n1. log_in\n2. sign_up\n3. view_products\n4. search_products\n5. add_to_cart\n6. checkout\n7. view_orders\n8. log_out\n9. contact_support\nRespond with the appropriate action as: {{\"action\": \"<action_name>\"}}"}
]
response = openai.chat.completions.create(
model='gpt-3.5-turbo',
messages=messages
)
action = response.choices[0].message.content
return action
def process_action(action_data):
action = json.loads(action_data).get('action', 'unknown')
# Define action handling logic
if action == 'log_in':
print("Redirecting to Log In page...")
elif action == 'sign_up':
print("Redirecting to Sign Up page...")
elif action == 'view_products':
print("Showing product listings...")
elif action == 'search_products':
print("Initiating product search...")
elif action == 'add_to_cart':
print("Adding product to cart...")
elif action == 'checkout':
print("Proceeding to checkout...")
elif action == 'view_orders':
print("Showing order history...")
elif action == 'log_out':
print("Logging out...")
elif action == 'contact_support':
print("Redirecting to support page...")
else:
print("Unknown action!")
def main():
openai.api_key = "YOUR OPEN AI KEY"
try:
audio = record_audio(5)
# Transcribe audio
print("\nTranscribing audio...")
transcript = transcribe_audio(audio)
if transcript:
print(f"\nTranscript: {transcript}")
print("\nGetting response from GPT...")
response = get_user_command_action(transcript)
if response:
print("\nGPT Response:")
print(response)
process_action(response)
except Exception as e:
print(f"An error occurred: {str(e)}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment