mvasin/transcribe.sh

## transcribe.sh
#!/bin/sh

# A Mac utility to transcribe audio files using OpenAI's API (Whisper)
#
# Usage:
#
# $OPENAI_API_KEY='your-key-here' sh ./transcribe.sh [/path/to/the/dir/with/the/audio/files]
#
# If the path to the files is not specified, the utility will work in the current directory

[ -z "$OPENAI_API_KEY" ] && echo "OPENAI_API_KEY environment variable is not set"

MAX_SIZE=26214400 # size in bytes equivalent to 25MB
SEGMENT=1200 # if the file is too big, it will be split into 20 min segments

if ! command -v jq &> /dev/null
then
  echo "the 'jq' utility could not be found, install it with 'brew install jq'"
  exit
fi

if ! command -v ffmpeg &> /dev/null
then
    echo "the 'ffmpeg' utility could not be found, install it with 'brew install ffmpeg'"
    exit
fi

WORK_DIR=${1:-$PWD}
INITIAL_PWD=$PWD
cd "$WORK_DIR"

# if there are any mp4 files in the working directory, they will have the audio extracted to an m4a file
for file in *.mp4
do
  # if the video file exists but the corresponding audio file doesn't - extract audio
  if [ -e "$file" ] && ! [ -e "${file%.mp4}.m4a" ]; then
    ffmpeg -hide_banner -loglevel error -i "${file}" -vn -acodec copy "${file%.mp4}.m4a" > /dev/null 2>&1
  fi
done

transcribe(){
  echo "Transcribing '$file_to_transcribe'..."
  RESPONSE=$(curl -s https://api.openai.com/v1/audio/transcriptions \
    -H "Authorization: Bearer $OPENAI_API_KEY" \
    -H "Content-Type: multipart/form-data" \
    -F "file=@${file_to_transcribe}" \
    -F "language=en" \
    -F "model=whisper-1")
  if echo "$RESPONSE" | grep -q "text"; then
      output="${file_to_transcribe%.*}.txt"
      echo "$RESPONSE" | jq -j .text > "$output"
      echo "'$file_to_transcribe' has been transcribed successfully ✅"
  else
      echo "Failed to transcribe '$split_file' ❌"
      echo $RESPONSE\n
      exit 1
  fi
}

for file in *; do
  if [[ "$file" == *.m4a ]] || [[ "$file" == *.mp3 ]]
  then
    FILE_SIZE=$(stat -f%z "$file")
    if (( FILE_SIZE > MAX_SIZE ))
    then
      echo "File '$file' is too big ($FILE_SIZE bytes). Splitting into smaller segments 🪚..."
      ffmpeg -hide_banner -loglevel error -i "$file" -f segment -segment_time $SEGMENT -c copy "${file%.*}-split-%03d.m4a"
      for split_file in "${file%.*}-split-"*.m4a; do
      file_to_transcribe=$split_file
      transcribe_and_rm() {
      transcribe
      rm "$split_file" # remove the split file after transcribing
      }
      transcribe_and_rm &
      done
    else
      file_to_transcribe="$file"
      transcribe &
    fi
  fi
done

cd $INITIAL_PWD

wait
	#!/bin/sh

	# A Mac utility to transcribe audio files using OpenAI's API (Whisper)
	#
	# Usage:
	#
	# $OPENAI_API_KEY='your-key-here' sh ./transcribe.sh [/path/to/the/dir/with/the/audio/files]
	#
	# If the path to the files is not specified, the utility will work in the current directory

	[ -z "$OPENAI_API_KEY" ] && echo "OPENAI_API_KEY environment variable is not set"

	MAX_SIZE=26214400 # size in bytes equivalent to 25MB
	SEGMENT=1200 # if the file is too big, it will be split into 20 min segments

	if ! command -v jq &> /dev/null
	then
	echo "the 'jq' utility could not be found, install it with 'brew install jq'"
	exit
	fi

	if ! command -v ffmpeg &> /dev/null
	then
	echo "the 'ffmpeg' utility could not be found, install it with 'brew install ffmpeg'"
	exit
	fi

	WORK_DIR=${1:-$PWD}
	INITIAL_PWD=$PWD
	cd "$WORK_DIR"

	# if there are any mp4 files in the working directory, they will have the audio extracted to an m4a file
	for file in *.mp4
	do
	# if the video file exists but the corresponding audio file doesn't - extract audio
	if [ -e "$file" ] && ! [ -e "${file%.mp4}.m4a" ]; then
	ffmpeg -hide_banner -loglevel error -i "${file}" -vn -acodec copy "${file%.mp4}.m4a" > /dev/null 2>&1
	fi
	done

	transcribe(){
	echo "Transcribing '$file_to_transcribe'..."
	RESPONSE=$(curl -s https://api.openai.com/v1/audio/transcriptions \
	-H "Authorization: Bearer $OPENAI_API_KEY" \
	-H "Content-Type: multipart/form-data" \
	-F "file=@${file_to_transcribe}" \
	-F "language=en" \
	-F "model=whisper-1")
	if echo "$RESPONSE" \| grep -q "text"; then
	output="${file_to_transcribe%.*}.txt"
	echo "$RESPONSE" \| jq -j .text > "$output"
	echo "'$file_to_transcribe' has been transcribed successfully ✅"
	else
	echo "Failed to transcribe '$split_file' ❌"
	echo $RESPONSE\n
	exit 1
	fi
	}

	for file in *; do
	if [[ "$file" == .m4a ]] \|\| [[ "$file" == .mp3 ]]
	then
	FILE_SIZE=$(stat -f%z "$file")
	if (( FILE_SIZE > MAX_SIZE ))
	then
	echo "File '$file' is too big ($FILE_SIZE bytes). Splitting into smaller segments 🪚..."
	ffmpeg -hide_banner -loglevel error -i "$file" -f segment -segment_time $SEGMENT -c copy "${file%.*}-split-%03d.m4a"
	for split_file in "${file%.}-split-".m4a; do
	file_to_transcribe=$split_file
	transcribe_and_rm() {
	transcribe
	rm "$split_file" # remove the split file after transcribing
	}
	transcribe_and_rm &
	done
	else
	file_to_transcribe="$file"
	transcribe &
	fi
	fi
	done

	cd $INITIAL_PWD

	wait