Skip to content

Instantly share code, notes, and snippets.

@mill1000
Last active April 8, 2023 22:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mill1000/4cc4fc0de1eec2a29080c3b6d0b074b7 to your computer and use it in GitHub Desktop.
Save mill1000/4cc4fc0de1eec2a29080c3b6d0b074b7 to your computer and use it in GitHub Desktop.
Bulk audio file transcription using Whisper.
#!/usr/bin/env python3
import whisper
import argparse
import os
import math
import datetime
import sys
if __name__ == "__main__":
# Argument parsing
parser = argparse.ArgumentParser(description="Transcribe files using Whisper and output a summary.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
"--model", help="Whisper model to use.", default="base.en")
parser.add_argument("--device", help="Whisper device to use.",
default="cuda", choices=["cuda", "cpu"])
parser.add_argument(
"--rename", help="Rename files with the transcribed text.", action="store_true")
parser.add_argument("--output", help="File to output to.",
type=argparse.FileType('w'), default=sys.stdout)
parser.add_argument("files", help="Audio files to transcribe.", nargs="+")
args = parser.parse_args()
def convert_time(time):
"""Convert a float representation of seconds to a time object."""
microseconds, total_seconds = math.modf(time)
minutes = total_seconds / 60
seconds = total_seconds % 60
return datetime.time(minute=int(minutes), second=int(seconds), microsecond=int(microseconds * 1e6))
def eprint(*a, **kwargs):
"""Print to stderr."""
print(*a, file=sys.stderr, **kwargs)
def oprint(*a, **kwargs):
"""Print to the specified output file."""
print(*a, file=args.output, **kwargs)
eprint(f"Transcribing {len(args.files)} files.")
# Load the selected Whisper model
eprint(f"Loading model '{args.model}' using device '{args.device}'.")
model = whisper.load_model(args.model, device=args.device)
# Supress warnings from Whisper by selecting proper FP16 mode
fp16 = True if args.device == "cuda" else False
# Convert each file
for file in args.files:
eprint(f"Transcribing '{file}'.")
oprint(f"{file}")
if not os.path.exists(file):
oprint(f"File does not exist.")
eprint(f"File '{file}' does not exists.")
continue
# Transcribe the file
result = model.transcribe(file, fp16=fp16)
# Print the complete text
complete_text = result["text"].strip()
oprint("Full Text:")
oprint(f" {complete_text}")
# Print each segment with timestamps
oprint("Segments:")
for segment in result["segments"]:
# print(segment)
text = segment["text"].strip()
start = convert_time(segment["start"])
end = convert_time(segment["end"])
timespan = f"[{start.isoformat(timespec='milliseconds')} -> {end.isoformat(timespec='milliseconds')}]"
oprint(f" {timespan}: {text}")
oprint("")
if args.rename:
prefix = os.path.normcase(complete_text[:32])
path, name = os.path.split(file)
new_file = os.path.normpath(f"{path}/{prefix} - {name}")
eprint(f"Renamed '{file}' to '{new_file}'.")
os.rename(file, new_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment