alexpovel/Dockerfile

## convert.py
#!/usr/bin/env python3

import logging
import sys
from pathlib import Path
from subprocess import run

import speech_recognition as sr

RECOGNIZER = sr.Recognizer()

files = {Path(file.strip()) for file in sys.stdin.read().split("\n") if file.strip()}
target_language = sys.argv[1]

def convert_to_audio(file: Path, to_suffix: str) -> Path:
    new_file = file.with_suffix(to_suffix)
    logging.info(f"Converting {file} to {new_file}...")
    if new_file.exists():
        logging.warning(f"File {new_file} exists, skipping audio conversion.")
    else:
        run(
            ["ffmpeg", "-i", str(file.absolute()), "-vn", str(new_file.absolute())],
            check=True,
        )
        logging.info(f"Converted {file} to {new_file}.")
    return new_file


def process(file: Path) -> None:
    audio_file = convert_to_audio(file, to_suffix=".flac")
    logging.warning(f"Starting audio processing on {audio_file}...")

    text_file = file.with_suffix(".transcribed.txt")
    if text_file.exists():
        logging.warning(f"File {text_file} exists, skipping audio recognition.")
    else:
        logging.warning(f"Getting audio of {audio_file}...")
        with sr.AudioFile(str(audio_file)) as source:
            audio = RECOGNIZER.record(source)
        logging.warning(f"Got audio of {audio_file}, starting recognition...")

        text = RECOGNIZER.recognize_whisper(audio, language=target_language)
        logging.warning(f"Finished audio processing on {audio_file}.")

        with open(text_file, mode="w") as f:
            f.write(text)
        logging.warning(f"Wrote results to {text_file}.")


def main():
    for file in files:
        process(file)

    # Can also do this via multiprocessing, however the recognition library itself seems to
    # be doing well utilizing multiple cores, so the following isn't really necessary.

    # from multiprocessing import Pool, cpu_count

    # with Pool(processes=cpu_count() - 1) as p:
    #     p.map(process, files)


if __name__ == "__main__":
    main()

## Dockerfile
FROM python:3.10.8-bullseye

RUN apt-get update && apt-get install --yes --no-install-recommends \
    python3-pyaudio=0.2.11-1.3+b1 \
    ffmpeg=7:4.3.5-0+deb11u1

# Alternatively, use git+https://github.com/openai/whisper.git
RUN pip install --no-cache-dir \
    speechrecognition==3.9.0 \
    whisper.ai==1.0.0.1

COPY convert.py .

## run.ps1
docker build --tag transcript .
docker run -v ${PWD}/videos:/videos transcript bash -c 'ls /videos/*.mp4 | python /convert.py german'

## run.sh
#!/bin/bash

docker build --tag transcript . && docker run -v $(pwd)/videos:/videos transcript bash -c 'ls /videos/*.mp4 | python /convert.py english'
	#!/usr/bin/env python3

	import logging
	import sys
	from pathlib import Path
	from subprocess import run

	import speech_recognition as sr

	RECOGNIZER = sr.Recognizer()

	files = {Path(file.strip()) for file in sys.stdin.read().split("\n") if file.strip()}
	target_language = sys.argv[1]

	def convert_to_audio(file: Path, to_suffix: str) -> Path:
	new_file = file.with_suffix(to_suffix)
	logging.info(f"Converting {file} to {new_file}...")
	if new_file.exists():
	logging.warning(f"File {new_file} exists, skipping audio conversion.")
	else:
	run(
	["ffmpeg", "-i", str(file.absolute()), "-vn", str(new_file.absolute())],
	check=True,
	)
	logging.info(f"Converted {file} to {new_file}.")
	return new_file


	def process(file: Path) -> None:
	audio_file = convert_to_audio(file, to_suffix=".flac")
	logging.warning(f"Starting audio processing on {audio_file}...")

	text_file = file.with_suffix(".transcribed.txt")
	if text_file.exists():
	logging.warning(f"File {text_file} exists, skipping audio recognition.")
	else:
	logging.warning(f"Getting audio of {audio_file}...")
	with sr.AudioFile(str(audio_file)) as source:
	audio = RECOGNIZER.record(source)
	logging.warning(f"Got audio of {audio_file}, starting recognition...")

	text = RECOGNIZER.recognize_whisper(audio, language=target_language)
	logging.warning(f"Finished audio processing on {audio_file}.")

	with open(text_file, mode="w") as f:
	f.write(text)
	logging.warning(f"Wrote results to {text_file}.")


	def main():
	for file in files:
	process(file)

	# Can also do this via multiprocessing, however the recognition library itself seems to
	# be doing well utilizing multiple cores, so the following isn't really necessary.

	# from multiprocessing import Pool, cpu_count

	# with Pool(processes=cpu_count() - 1) as p:
	# p.map(process, files)


	if __name__ == "__main__":
	main()
	FROM python:3.10.8-bullseye

	RUN apt-get update && apt-get install --yes --no-install-recommends \
	python3-pyaudio=0.2.11-1.3+b1 \
	ffmpeg=7:4.3.5-0+deb11u1

	# Alternatively, use git+https://github.com/openai/whisper.git
	RUN pip install --no-cache-dir \
	speechrecognition==3.9.0 \
	whisper.ai==1.0.0.1

	COPY convert.py .
	docker build --tag transcript .
	docker run -v ${PWD}/videos:/videos transcript bash -c 'ls /videos/*.mp4 \| python /convert.py german'
	#!/bin/bash

	docker build --tag transcript . && docker run -v $(pwd)/videos:/videos transcript bash -c 'ls /videos/*.mp4 \| python /convert.py english'