JoeShanahan/batch_transcribe.py

## batch_transcribe.py
import os
import subprocess

from faster_whisper import WhisperModel

model_size = "medium.en"
input_directory = os.getcwd()

# Run on GPU with FP16
# model = WhisperModel(model_size, device="cuda", compute_type="float16")

# or run on GPU with INT8
# model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")

# or run on CPU with INT8
model = WhisperModel(model_size, device="cpu", compute_type="int8")

# Make sure the ouput directories exist
text_dir = os.path.join(input_directory, "Text Files")
mp3_dir = os.path.join(input_directory, "Audio Files")

if not os.path.exists(text_dir):
    os.makedirs(text_dir)

if not os.path.exists(mp3_dir):
    os.makedirs(mp3_dir)


def transcribe_file(in_audio_file, out_text_file):
    segments, info = model.transcribe(in_audio_file, beam_size=5)

    duration = info.duration

    hours = int(duration // 3600)
    minutes = int((duration % 3600) // 60)
    seconds = int(duration % 60)

    print(f"Transcribing file: {in_audio_file} ({hours}:{minutes:02}:{seconds:02})")

    out_data = ""

    previous_text = ""

    for segment in segments:
        if segment.text == previous_text:
            continue

        hours = int(segment.start // 3600)
        minutes = int((segment.start % 3600) // 60)
        seconds = int(segment.start % 60)

        out_data += f"[{hours}:{minutes:02}:{seconds:02}] {segment.text}\n\n"

        percent_done = int((segment.end / duration) * 100)
        print(f"{percent_done}% complete...")
        previous_text = segment.text

    with open(out_text_file, "w") as file_handle:
        file_handle.write(out_data)

    print("Done!")


def extract_audio(in_video_file, out_audio_file):
    cmd = [
        "ffmpeg",
        "-i", in_video_file,
        "-vn",  # No video
        "-acodec", "libmp3lame",
        "-q:a", "2",  # Good quality
        out_audio_file
    ]

    print(f"Extracting audio from: {in_video_file}")
    try:
        subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        print(f"Done!")
    except subprocess.CalledProcessError:
        print(f"Error converting!")


# Extract text and audio from all video files
for f in os.listdir(input_directory):
    input_file = os.path.join(input_directory, f)

    if not os.path.isfile(input_file):
        continue

    if not f.lower().endswith(".m4v"):
        continue

    ext = f.split(".")[-1]
    f_no_ext = f.strip("." + ext)

    text_file = os.path.join(text_dir, f_no_ext + ".txt")
    mp3_file = os.path.join(mp3_dir, f_no_ext + ".mp3")

    if not os.path.exists(mp3_file):
        extract_audio(input_file, mp3_file)

    if not os.path.exists(mp3_file):
        print("Cannot transcribe because couldn't convert to MP3!")
        continue

    if os.path.exists(text_file):
        print(f"Text file already exists, skipping: {f}")
    else:
        transcribe_file(mp3_file, text_file)


# Combine all text files into a single file
print("Writing summary file...")
summary_file = os.path.join(text_dir, "ALL-TEXT.TXT")

with open(summary_file, "w") as out_file_handle:
    all_files = []

    for f in os.listdir(text_dir):
        if not f.endswith(".txt"):
            continue

        all_files.append(f)

    all_files.sort()

    for f in all_files:
        out_file_handle.write(f"\n\n[ - - - - - - - - File: {f[:-4]} - - - - - - - - ]\n\n")

        in_file = os.path.join(text_dir, f)

        with open(in_file, "r") as in_file_handle:
            out_file_handle.write(in_file_handle.read())
	import os
	import subprocess

	from faster_whisper import WhisperModel

	model_size = "medium.en"
	input_directory = os.getcwd()

	# Run on GPU with FP16
	# model = WhisperModel(model_size, device="cuda", compute_type="float16")

	# or run on GPU with INT8
	# model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")

	# or run on CPU with INT8
	model = WhisperModel(model_size, device="cpu", compute_type="int8")

	# Make sure the ouput directories exist
	text_dir = os.path.join(input_directory, "Text Files")
	mp3_dir = os.path.join(input_directory, "Audio Files")

	if not os.path.exists(text_dir):
	os.makedirs(text_dir)

	if not os.path.exists(mp3_dir):
	os.makedirs(mp3_dir)


	def transcribe_file(in_audio_file, out_text_file):
	segments, info = model.transcribe(in_audio_file, beam_size=5)

	duration = info.duration

	hours = int(duration // 3600)
	minutes = int((duration % 3600) // 60)
	seconds = int(duration % 60)

	print(f"Transcribing file: {in_audio_file} ({hours}:{minutes:02}:{seconds:02})")

	out_data = ""

	previous_text = ""

	for segment in segments:
	if segment.text == previous_text:
	continue

	hours = int(segment.start // 3600)
	minutes = int((segment.start % 3600) // 60)
	seconds = int(segment.start % 60)

	out_data += f"[{hours}:{minutes:02}:{seconds:02}] {segment.text}\n\n"

	percent_done = int((segment.end / duration) * 100)
	print(f"{percent_done}% complete...")
	previous_text = segment.text

	with open(out_text_file, "w") as file_handle:
	file_handle.write(out_data)

	print("Done!")


	def extract_audio(in_video_file, out_audio_file):
	cmd = [
	"ffmpeg",
	"-i", in_video_file,
	"-vn", # No video
	"-acodec", "libmp3lame",
	"-q:a", "2", # Good quality
	out_audio_file
	]

	print(f"Extracting audio from: {in_video_file}")
	try:
	subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
	print(f"Done!")
	except subprocess.CalledProcessError:
	print(f"Error converting!")


	# Extract text and audio from all video files
	for f in os.listdir(input_directory):
	input_file = os.path.join(input_directory, f)

	if not os.path.isfile(input_file):
	continue

	if not f.lower().endswith(".m4v"):
	continue

	ext = f.split(".")[-1]
	f_no_ext = f.strip("." + ext)

	text_file = os.path.join(text_dir, f_no_ext + ".txt")
	mp3_file = os.path.join(mp3_dir, f_no_ext + ".mp3")

	if not os.path.exists(mp3_file):
	extract_audio(input_file, mp3_file)

	if not os.path.exists(mp3_file):
	print("Cannot transcribe because couldn't convert to MP3!")
	continue

	if os.path.exists(text_file):
	print(f"Text file already exists, skipping: {f}")
	else:
	transcribe_file(mp3_file, text_file)


	# Combine all text files into a single file
	print("Writing summary file...")
	summary_file = os.path.join(text_dir, "ALL-TEXT.TXT")

	with open(summary_file, "w") as out_file_handle:
	all_files = []

	for f in os.listdir(text_dir):
	if not f.endswith(".txt"):
	continue

	all_files.append(f)

	all_files.sort()

	for f in all_files:
	out_file_handle.write(f"\n\n[ - - - - - - - - File: {f[:-4]} - - - - - - - - ]\n\n")

	in_file = os.path.join(text_dir, f)

	with open(in_file, "r") as in_file_handle:
	out_file_handle.write(in_file_handle.read())