Skip to content

Instantly share code, notes, and snippets.

@JorianWoltjer
Created November 28, 2023 17:32
Show Gist options
  • Save JorianWoltjer/c5c96ea7b9af795b129201193353e3a9 to your computer and use it in GitHub Desktop.
Save JorianWoltjer/c5c96ea7b9af795b129201193353e3a9 to your computer and use it in GitHub Desktop.
import argparse
from pathlib import Path
parser = argparse.ArgumentParser()
parser.add_argument("input", type=Path, help="Input audio file (.mp3, .wav, .m4a, etc.)")
parser.add_argument("output", type=Path, help="Output text file (.txt)")
parser.add_argument("-m", "--model", choices=["small", "medium", "large"], default="medium", help="Model size")
args = parser.parse_args()
from faster_whisper import WhisperModel
from tqdm import tqdm
print("Loading model...")
model = WhisperModel(args.model, device="cuda", compute_type="float16")
segments, info = model.transcribe(open(args.input, "rb"), beam_size=5)
print(f"Reading from {args.input}")
def format_time(seconds):
seconds = int(seconds)
return f"{seconds // 60:02d}:{seconds % 60:02d}"
with tqdm(total=round(info.duration), unit='sec') as pbar:
with open(args.output, "w") as f:
for segment in segments:
time, text = segment.start, segment.text.strip()
f.write(f"[{format_time(time)}] {text}\n")
f.flush()
pbar.update(segment.end - segment.start)
print(f"Transcription saved to {args.output}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment