Skip to content

Instantly share code, notes, and snippets.

@geerlingguy
Created April 26, 2023 18:04
Show Gist options
  • Save geerlingguy/94eb0fee814e4168552d62fa1386dccd to your computer and use it in GitHub Desktop.
Save geerlingguy/94eb0fee814e4168552d62fa1386dccd to your computer and use it in GitHub Desktop.
AI scripts for vlog footage batch transcription
#!/bin/bash
# Bash function to append contents of a given text file to an output file.
file_compile() {
file=$1
filename=$(basename "$file")
output_file="txt-file-output.txt"
printf "Adding contents of: $filename\n"
# Write filename as Markdown header, then \n, then contents, then \n\n.
printf "## $filename\n\n" >>$output_file
cat "$file" | sed -e 's/^ //g' >>$output_file
printf '\n\n\n' >>$output_file
}
export -f file_compile
# Find all files with case-insensitive extension '.txt' walking through
# directories in alphabetical order, and process them.
find . -type f -iname \*.txt -print0 | sort -z | xargs -0r -n1 bash -c 'file_compile "$0"'
#!/usr/bin/env python3
# First make sure Whisper is installed:
#
# pip3 install git+https://github.com/openai/whisper.git
#
# On my Mac, I'm using pyenv to run Python 3.10 (currently) since 3.11 is too
# new for some of the dependencies.
import os
import whisper
from tqdm import tqdm
current_dir = os.path.dirname(os.path.realpath(__file__))
num_files = 0
# Load the Whisper model.
print("Loading whisper model...")
model = whisper.load_model("small.en")
# Count how many files we need to process.
for dirpath, dirnames, filenames in os.walk(current_dir):
for filename in filenames:
if filename.lower().endswith((".mov", ".mp4")) and "vlog" in filename:
# print(filename)
num_files += 1
print("Number of files to process: ", num_files)
# Transcribe the files, displaying a progress bar.
with tqdm(total=num_files, desc="Transcribing Files") as progress_bar:
for dirpath, dirnames, filenames in os.walk(current_dir):
for filename in filenames:
if filename.lower().endswith((".mov", ".mp4")) and "vlog" in filename:
progress_bar.set_postfix_str(filename)
# Transcribe with whisper.
filepath = os.path.join(dirpath, filename)
result = model.transcribe(filepath, language="English", fp16=False)
transcription = result['text']
# Write transcription to txt file.
filename_no_ext = os.path.splitext(filename)[0]
with open(os.path.join(dirpath, filename_no_ext + '.txt'), 'w') as f:
f.write(transcription)
# Update the progress bar.
progress_bar.update(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment