Skip to content

Instantly share code, notes, and snippets.

@jckw
Last active September 13, 2023 20:38
Show Gist options
  • Save jckw/c0e3c0fe42bdeeb1999f2d5fec247e39 to your computer and use it in GitHub Desktop.
Save jckw/c0e3c0fe42bdeeb1999f2d5fec247e39 to your computer and use it in GitHub Desktop.
Dictate to OpenAI's Whisper API from the cmdline
#!/bin/bash
# @author: github.com/jckw
# @dependencies:
# - sox (brew install sox)
# Define constants
TMP_FILE="/tmp/openai_recording.mp3"
OPENAI_KEY="YOUR_OPENAI_API_KEY"
# Function to handle Ctrl+C interruption and stop recording
interrupt_recording() {
display_message "Stopping recording..."
# Kill the sox recording process
kill $SOX_PID
}
# Function to display formatted messages
display_message() {
echo -e "\n----------------------------------"
echo "$1"
echo -e "----------------------------------\n"
}
# Set the trap for interruption
trap interrupt_recording SIGINT
# Start the audio recording
display_message "Recording... Press Ctrl+C to stop."
rec $TMP_FILE &
# Get the process ID of sox for interruption handling
SOX_PID=$!
# Wait for the recording to finish
wait $SOX_PID
# A short delay to ensure the file is completely written
sleep 1
# Send the recording to OpenAI and extract the 'text' field
display_message "Sending the recording to OpenAI..."
RESPONSE_TEXT=$(curl --request POST \
--url https://api.openai.com/v1/audio/transcriptions \
--header "Authorization: Bearer $OPENAI_KEY" \
--header 'Content-Type: multipart/form-data' \
--form "file=@$TMP_FILE" \
--form model=whisper-1 \
--form response_format=text
)
# Check if API request was successful
if [ $? -ne 0 ]; then
echo "Failed to send recording or parse response."
exit 1
fi
# Copy the transcription to the clipboard (assuming pbcopy is available on MacOS)
echo $RESPONSE_TEXT | pbcopy
display_message "Transcription copied to clipboard!"
echo $RESPONSE_TEXT
# Cleanup the temporary recording file
rm $TMP_FILE
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment