Skip to content

Instantly share code, notes, and snippets.

@dreness
Created February 11, 2023 08:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dreness/2ca0bbd16402ff00621974e7815c51ca to your computer and use it in GitHub Desktop.
Save dreness/2ca0bbd16402ff00621974e7815c51ca to your computer and use it in GitHub Desktop.
Batch-transcribe audio files with whisper.cpp
#!/bin/zsh
#
# Batch transcribe audio files with whisper.cpp and ffmpeg
# This script wants:
# - ffmpeg installed and accessible via $PATH
# - whisper.cpp built locally:
# git clone https://github.com/ggerganov/whisper.cpp
# cd whisper.cpp
# make
# The compiled binary will be called 'main' in the whisper.cpp directory.
# Set that path in the 'whisper' variable below.
# path to whisper.cpp binary
whisper="/Users/andre/work/whisper.cpp/main"
# uncomment for debugging
# set -x
# command line arguments are:
# - target directory where output files will be written
# - shell glob pattern to match audio input files
# check for required arguments
if [ $# -lt 2 ]; then
echo "Usage: $0 target_dir pattern"
echo "e.g. $0 ~/transcriptions ~/podcasts/*.mp3"
exit 1
fi
# set target directory and remove $1 from $@
target_dir=$1
shift
# text file that lists previously transcribed files
processed_file=$target_dir/processed.txt
# function to check if a file has already been processed
already_processed() {
if grep -F -q "$1" $processed_file; then
echo "Skipping $1"
return 0
fi
return 1
}
# function that calls ffmpeg to convert an audio file to WAV, 16 bit, 16 kHz
convert_to_wav() {
file=$1
out_file=$2
echo "Converting $file to $out_file"
ffmpeg -i "$file" -acodec pcm_s16le -ac 1 -ar 16000 "${out_file}"
}
# function that calls whisper-cpp to transcribe a WAV file
transcribe_with_whisper_cpp() {
wav_file="${1}"
# transcribe file
echo "Transcribing $1"
"${whisper}" -t 8 -otxt -ovtt -osrt -owts -ocsv -f "${wav_file}"
echo "${wav_file}" >> $processed_file
}
# move into whisper directory to run whisper-cpp
echo Temporarily moving into $(dirname ${whisper})
pushd $(dirname ${whisper})
# loop over all files matching the pattern
for file in $@; do
# temporary WAV file
wav_file="$target_dir/$(basename $file).wav"
echo "looking for $wav_file..."
already_processed "$wav_file" && continue
# convert file to WAV
convert_to_wav "$file" "${wav_file}"
# transcribe file
transcribe_with_whisper_cpp "${wav_file}"
# clean up wav file
rm "${wav_file}"
done
popd
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment