Skip to content

Instantly share code, notes, and snippets.

@JoeShanahan
Created May 11, 2025 22:04
Show Gist options
  • Save JoeShanahan/2c3fbde748352f7a2881beed50e85435 to your computer and use it in GitHub Desktop.
Save JoeShanahan/2c3fbde748352f7a2881beed50e85435 to your computer and use it in GitHub Desktop.
Batch Transcribe
import os
import subprocess
from faster_whisper import WhisperModel
model_size = "medium.en"
input_directory = os.getcwd()
# Run on GPU with FP16
# model = WhisperModel(model_size, device="cuda", compute_type="float16")
# or run on GPU with INT8
# model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
# or run on CPU with INT8
model = WhisperModel(model_size, device="cpu", compute_type="int8")
# Make sure the ouput directories exist
text_dir = os.path.join(input_directory, "Text Files")
mp3_dir = os.path.join(input_directory, "Audio Files")
if not os.path.exists(text_dir):
os.makedirs(text_dir)
if not os.path.exists(mp3_dir):
os.makedirs(mp3_dir)
def transcribe_file(in_audio_file, out_text_file):
segments, info = model.transcribe(in_audio_file, beam_size=5)
duration = info.duration
hours = int(duration // 3600)
minutes = int((duration % 3600) // 60)
seconds = int(duration % 60)
print(f"Transcribing file: {in_audio_file} ({hours}:{minutes:02}:{seconds:02})")
out_data = ""
previous_text = ""
for segment in segments:
if segment.text == previous_text:
continue
hours = int(segment.start // 3600)
minutes = int((segment.start % 3600) // 60)
seconds = int(segment.start % 60)
out_data += f"[{hours}:{minutes:02}:{seconds:02}] {segment.text}\n\n"
percent_done = int((segment.end / duration) * 100)
print(f"{percent_done}% complete...")
previous_text = segment.text
with open(out_text_file, "w") as file_handle:
file_handle.write(out_data)
print("Done!")
def extract_audio(in_video_file, out_audio_file):
cmd = [
"ffmpeg",
"-i", in_video_file,
"-vn", # No video
"-acodec", "libmp3lame",
"-q:a", "2", # Good quality
out_audio_file
]
print(f"Extracting audio from: {in_video_file}")
try:
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
print(f"Done!")
except subprocess.CalledProcessError:
print(f"Error converting!")
# Extract text and audio from all video files
for f in os.listdir(input_directory):
input_file = os.path.join(input_directory, f)
if not os.path.isfile(input_file):
continue
if not f.lower().endswith(".m4v"):
continue
ext = f.split(".")[-1]
f_no_ext = f.strip("." + ext)
text_file = os.path.join(text_dir, f_no_ext + ".txt")
mp3_file = os.path.join(mp3_dir, f_no_ext + ".mp3")
if not os.path.exists(mp3_file):
extract_audio(input_file, mp3_file)
if not os.path.exists(mp3_file):
print("Cannot transcribe because couldn't convert to MP3!")
continue
if os.path.exists(text_file):
print(f"Text file already exists, skipping: {f}")
else:
transcribe_file(mp3_file, text_file)
# Combine all text files into a single file
print("Writing summary file...")
summary_file = os.path.join(text_dir, "ALL-TEXT.TXT")
with open(summary_file, "w") as out_file_handle:
all_files = []
for f in os.listdir(text_dir):
if not f.endswith(".txt"):
continue
all_files.append(f)
all_files.sort()
for f in all_files:
out_file_handle.write(f"\n\n[ - - - - - - - - File: {f[:-4]} - - - - - - - - ]\n\n")
in_file = os.path.join(text_dir, f)
with open(in_file, "r") as in_file_handle:
out_file_handle.write(in_file_handle.read())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment