JorianWoltjer/faster-whisper.py

## faster-whisper.py
import argparse
from pathlib import Path

parser = argparse.ArgumentParser()
parser.add_argument("input", type=Path, help="Input audio file (.mp3, .wav, .m4a, etc.)")
parser.add_argument("output", type=Path, help="Output text file (.txt)")
parser.add_argument("-m", "--model", choices=["small", "medium", "large"], default="medium", help="Model size")
args = parser.parse_args()

from faster_whisper import WhisperModel
from tqdm import tqdm

print("Loading model...")

model = WhisperModel(args.model, device="cuda", compute_type="float16")
segments, info = model.transcribe(open(args.input, "rb"), beam_size=5)

print(f"Reading from {args.input}")

def format_time(seconds):
    seconds = int(seconds)
    return f"{seconds // 60:02d}:{seconds % 60:02d}"

with tqdm(total=round(info.duration), unit='sec') as pbar:
    with open(args.output, "w") as f:
        for segment in segments:
            time, text = segment.start, segment.text.strip()
            f.write(f"[{format_time(time)}] {text}\n")
            f.flush()
            pbar.update(segment.end - segment.start)

print(f"Transcription saved to {args.output}")
	import argparse
	from pathlib import Path

	parser = argparse.ArgumentParser()
	parser.add_argument("input", type=Path, help="Input audio file (.mp3, .wav, .m4a, etc.)")
	parser.add_argument("output", type=Path, help="Output text file (.txt)")
	parser.add_argument("-m", "--model", choices=["small", "medium", "large"], default="medium", help="Model size")
	args = parser.parse_args()

	from faster_whisper import WhisperModel
	from tqdm import tqdm

	print("Loading model...")

	model = WhisperModel(args.model, device="cuda", compute_type="float16")
	segments, info = model.transcribe(open(args.input, "rb"), beam_size=5)

	print(f"Reading from {args.input}")

	def format_time(seconds):
	seconds = int(seconds)
	return f"{seconds // 60:02d}:{seconds % 60:02d}"

	with tqdm(total=round(info.duration), unit='sec') as pbar:
	with open(args.output, "w") as f:
	for segment in segments:
	time, text = segment.start, segment.text.strip()
	f.write(f"[{format_time(time)}] {text}\n")
	f.flush()
	pbar.update(segment.end - segment.start)

	print(f"Transcription saved to {args.output}")