Skip to content

Instantly share code, notes, and snippets.

@maledorak
Last active May 23, 2023 21:48
Show Gist options
  • Save maledorak/1f84681434e427188685d07c1816a287 to your computer and use it in GitHub Desktop.
Save maledorak/1f84681434e427188685d07c1816a287 to your computer and use it in GitHub Desktop.
Microphone voice transcription and translation with OpenAI Whisper
#!/bin/bash
# ====================================
# author: https://github.com/maledorak
# email: maledorak@gmail.com
# version: 0.1
# description: Script for recording voice from microphone and sending to Whisper OpenAI
#
# how to use:
# 1. run it - this will start recording sound: ./whisper.sh -c transcribe -l 30
# 2. stop recording by pressing Ctrl+C or rerun it again with the same arguments to 'shorten' the recording
#
# tips:
# The best way to use it is to bind it to a key combination in your window manager
# and then just press it to start recording and stop recording with the same key combination
# ====================================
# Fail the script
function fail { exit 1; }
# Supported commands
TRANSCRIBE='transcribe'
TRANSLATE='translate'
COMMANDS_ARRAY=($TRANSCRIBE $TRANSLATE)
COMMANDS_PIPED=$(IFS="|" ; echo "${COMMANDS_ARRAY[*]}")
function show_help_and_exit
{
printf "\nUsage: %s -c [${COMMANDS_PIPED}] -s [30] \n\n" "$(basename "$0")"
printf "Script for recording voice from microphone and sending to Whisper OpenAI \n\n"
printf "Options:\n"
printf " %-20s - %s\n" "-h|--help" "Show this help message"
printf " %-20s - %s\n" "-c" "Whisper command to run [${COMMANDS_PIPED}}]"
printf " %-20s - %s\n" "-l" "Record length in seconds (default: 30)"
printf " %-20s - %s\n" "-k" "Kill all whisper processes"
printf "\n"
fail
}
#====== Check if following commands are installed ====
# Check if ffmpeg is installed
if ! command -v ffmpeg &> /dev/null
then
echo "ffmpeg could not be found"
fail
fi
# Check if jq is installed
if ! command -v jq &> /dev/null
then
echo "jq could not be found"
fail
fi
# Check if xclip is installed
if ! command -v xclip &> /dev/null
then
echo "xclip could not be found"
fail
fi
# Check if openai is installed
if ! command -v openai &> /dev/null
then
echo "openai could not be found"
fail
fi
# Check if notify-send is installed
if ! command -v notify-send &> /dev/null
then
echo "notify-send could not be found"
fail
fi
#=======================================
# ----------------------------------------------------------------------------------------------------------------------
COMMAND=
LENGTH=30
# Parse arguments
while getopts "hkc:l:" opt; do
case "${opt}" in
h)
show_help_and_exit
;;
\?)
error "Unknown option: $1"
show_help_and_exit
;;
c)
COMMAND=${OPTARG}
;;
l)
LENGTH=${OPTARG}
;;
k)
notify-send "Whisper" "Killing all processes" &
pkill -f whisper
exit 0
;;
esac
done
# Validate command argument
if [[ -z $COMMAND ]]; then
show_help_and_exit
fi
# Validate command argument, only transcribe or translate are supported
if [[ ! "$COMMAND" =~ ^($COMMANDS_PIPED)$ ]]; then
echo "You can use only [$COMMANDS_PIPED] as a command [-c] argument, but '${COMMAND}' was used!"
show_help_and_exit
fi
# Validate length argument
if [[ ! $LENGTH =~ ^[[:digit:]-]+$ ]]; then
echo "Length [-l] argument must be a number, but '${LENGTH}' was used!"
fail
fi
# Validate length argument
if [[ -z $OPENAI_API_KEY ]]; then
echo "OPENAI_API_KEY environment variable is not set!"
echo "Use the following link to set it up: https://platform.openai.com/account/api-keys"
fail
fi
function openai_request {
local cmd=$1
local file_path=$2
declare -A cmd_map=( ["$TRANSCRIBE"]="transcriptions" ["$TRANSLATE"]="translations" )
local url_path=${cmd_map[$cmd]}
# https://platform.openai.com/docs/api-reference/audio/create
local response=$(curl https://api.openai.com/v1/audio/$url_path \
-H "Authorization: Bearer $OPENAI_API_KEY" \
-H "Content-Type: multipart/form-data" \
-F file="@$file_path" \
-F model="whisper-1")
local text=$(echo $response | jq -r '.text')
if [ "$text" == "null" ]; then
echo "Whisper API error $response"
return 1
else
echo $text
return 0
fi
}
function ffmpeg_record {
local file_path=$1
ffmpeg \
-f alsa \
-channels 2 \
-sample_rate 44100 \
-i hw:0 \
-t $LENGTH \
$file_path
local code=$?
if [ $code -eq 255 ]; then
# ffmpeg returns 255 when Ctrl+C is pressed or SIGINT (-2) is received
echo "FFmpeg has stopped with SIGINT"
return 0
elif [ $code -ne 0 ]; then
echo "FFmpeg error $code"
return $code
else
echo "FFmpeg has stopped"
return 0
fi
}
FILE_PREFIX=whisper_$COMMAND
IS_OTHER_INSTANCE_RUNNING=$(pgrep -fa $FILE_PREFIX | awk '{print $NF}')
if [ -z "$IS_OTHER_INSTANCE_RUNNING" ]; then
echo "No process, FFmpeg recording"
notify-send "Whisper" "$COMMAND is listening..." &
FFMPEG_OUT="/tmp/${FILE_PREFIX}_$(date '+%Y%m%d%H%M%S').mp3"
ffmpeg_record $FFMPEG_OUT
ffmpeg_record_code=$?
if [ $ffmpeg_record_code -eq 0 ]; then
echo "FFmpeg has stopped, sending request to OpenAI"
declare -A cmd_map=( ["$TRANSCRIBE"]="Transcribing" ["$TRANSLATE"]="Translating" )
notify_string=${cmd_map[$COMMAND]}
notify-send "Whisper" "$notify_string..." &
text=$(openai_request $COMMAND $FFMPEG_OUT)
openai_code=$?
if [ $openai_code -ne 0 ]; then
notify-send "Whisper" "OpenAI error..." &
rm -rf $FFMPEG_OUT
exit $openai_code
else
echo "OpenAI response: $text"
echo -n $text | xclip -selection clipboard
notify-send "Whisper" "'$text' copied to clipboard." &
echo "Clean up file $FFMPEG_OUT"
rm -rf $FFMPEG_OUT
exit 0
fi
else
echo "FFmpeg error $ffmpeg_record_code"
notify-send "Whisper" "FFmpeg error $ffmpeg_record_code" &
exit $ffmpeg_record_code
fi
else
echo "Process found, killing and trimming audio"
pid=$(pgrep -fa $FILE_PREFIX | awk '{print $1}')
kill -SIGINT $pid
while $(kill -0 $pid 2>/dev/null); do
echo 'Waiting for ffmpeg to finish...'
sleep 0.01
done
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment