Last active
November 11, 2024 19:33
Listen to user speech and convert it to UI commands
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Listen to user speech and convert it to UI commands | |
# Şamil Korkmaz, 20.10.2024 | |
import speech_recognition as sr | |
import openai | |
import time | |
import getpass | |
from datetime import datetime | |
import json | |
def record_audio(duration): | |
"""Record audio from microphone for specified duration""" | |
recognizer = sr.Recognizer() | |
with sr.Microphone() as source: | |
print(f"Recording for {duration} seconds...") | |
audio = recognizer.record(source, duration=duration) | |
print("Recording complete!") | |
return audio | |
def transcribe_audio(audio): | |
"""Transcribe audio using OpenAI Whisper""" | |
try: | |
# Save audio to temporary file | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
temp_filename = f"temp_audio_{timestamp}.wav" | |
with open(temp_filename, "wb") as f: | |
f.write(audio.get_wav_data()) | |
# Use OpenAI client for Whisper | |
with open(temp_filename, "rb") as audio_file: | |
transcript = openai.audio.transcriptions.create( | |
model="whisper-1", | |
file=audio_file | |
) | |
return transcript.text | |
except Exception as e: | |
print(f"Error in transcription: {str(e)}") | |
return None | |
# Function to get the user command and process it using the chat endpoint | |
def get_user_command_action(user_command): | |
messages = [ | |
{'role': 'system', 'content': 'You are an assistant that provides structured JSON responses based on user commands.'}, | |
{'role': 'user', 'content': f"Interpret the following user command: '{user_command}' and provide the action as a structured JSON response. The available actions are:\n1. log_in\n2. sign_up\n3. view_products\n4. search_products\n5. add_to_cart\n6. checkout\n7. view_orders\n8. log_out\n9. contact_support\nRespond with the appropriate action as: {{\"action\": \"<action_name>\"}}"} | |
] | |
response = openai.chat.completions.create( | |
model='gpt-3.5-turbo', | |
messages=messages | |
) | |
action = response.choices[0].message.content | |
return action | |
def process_action(action_data): | |
action = json.loads(action_data).get('action', 'unknown') | |
# Define action handling logic | |
if action == 'log_in': | |
print("Redirecting to Log In page...") | |
elif action == 'sign_up': | |
print("Redirecting to Sign Up page...") | |
elif action == 'view_products': | |
print("Showing product listings...") | |
elif action == 'search_products': | |
print("Initiating product search...") | |
elif action == 'add_to_cart': | |
print("Adding product to cart...") | |
elif action == 'checkout': | |
print("Proceeding to checkout...") | |
elif action == 'view_orders': | |
print("Showing order history...") | |
elif action == 'log_out': | |
print("Logging out...") | |
elif action == 'contact_support': | |
print("Redirecting to support page...") | |
else: | |
print("Unknown action!") | |
def main(): | |
openai.api_key = "YOUR OPEN AI KEY" | |
try: | |
audio = record_audio(5) | |
# Transcribe audio | |
print("\nTranscribing audio...") | |
transcript = transcribe_audio(audio) | |
if transcript: | |
print(f"\nTranscript: {transcript}") | |
print("\nGetting response from GPT...") | |
response = get_user_command_action(transcript) | |
if response: | |
print("\nGPT Response:") | |
print(response) | |
process_action(response) | |
except Exception as e: | |
print(f"An error occurred: {str(e)}") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment