jlmalone/whisperpy.py

## whisperpy.py
# Usage
# python3 whisperpy.py [filename_input or youtube url]
# The script will transcribe the audio and create an SRT file with the same name as the input file
# (or YouTube video title) in the current directory. The name of the SRT file will be printed as the output.


import sys
import whisper
import hashlib
from pytube import YouTube
from datetime import timedelta
import os

def download_video(url):
    print("Start downloading", url)
    yt = YouTube(url)

    hash_file = hashlib.md5()
    hash_file.update(yt.title.encode())

    file_name = f'{hash_file.hexdigest()}.mp4'

    yt.streams.first().download("", file_name)
    print("Downloaded to", file_name)

    return {
        "file_name": file_name,
        "title": yt.title
    }

def transcribe_audio(path):
    model = whisper.load_model("large")
    print("Whisper model loaded.")
    if path.startswith("https://www.youtube.com"):
        video = download_video(path)
    else:
        video = {"file_name": path, "title": os.path.splitext(os.path.basename(path))[0]}
    segments = model.transcribe(video["file_name"])['segments']

    # Extract short filename from path if provided
    short_filename = os.path.splitext(os.path.basename(path))[0]

    for segment in segments:
        startTime = str(0) + str(timedelta(seconds=int(segment['start']))) + ',000'
        endTime = str(0) + str(timedelta(seconds=int(segment['end']))) + ',000'
        text = segment['text']
        segmentId = segment['id'] + 1
        print(f"Processing segment: {segmentId}")

        if len(text) > 0:
            if text[0] == ' ':
                text = text[1:]

        print(f"Segment text: {text}")

        segment = f"{segmentId}\n{startTime} --> {endTime}\n{text}\n\n"

        srtFilename = os.path.join(".", f"{short_filename}.srt")
        with open(srtFilename, 'a', encoding='utf-8') as srtFile:
            srtFile.write(segment)

    return srtFilename

if __name__ == '__main__':
    if len(sys.argv) != 2:
        print("Usage: python whisperpy.py [system path of webm or mp4 file OR YouTube URL]")
        sys.exit(1)

    input_path = sys.argv[1]
    try:
        result = transcribe_audio(input_path)
        print("SRT file created:", result)
    except Exception as e:
        print("Error:", str(e))


# this is derived from https://github.com/openai/whisper/discussions/98#discussioncomment-4583593
	# Usage
	# python3 whisperpy.py [filename_input or youtube url]
	# The script will transcribe the audio and create an SRT file with the same name as the input file
	# (or YouTube video title) in the current directory. The name of the SRT file will be printed as the output.


	import sys
	import whisper
	import hashlib
	from pytube import YouTube
	from datetime import timedelta
	import os

	def download_video(url):
	print("Start downloading", url)
	yt = YouTube(url)

	hash_file = hashlib.md5()
	hash_file.update(yt.title.encode())

	file_name = f'{hash_file.hexdigest()}.mp4'

	yt.streams.first().download("", file_name)
	print("Downloaded to", file_name)

	return {
	"file_name": file_name,
	"title": yt.title
	}

	def transcribe_audio(path):
	model = whisper.load_model("large")
	print("Whisper model loaded.")
	if path.startswith("https://www.youtube.com"):
	video = download_video(path)
	else:
	video = {"file_name": path, "title": os.path.splitext(os.path.basename(path))[0]}
	segments = model.transcribe(video["file_name"])['segments']

	# Extract short filename from path if provided
	short_filename = os.path.splitext(os.path.basename(path))[0]

	for segment in segments:
	startTime = str(0) + str(timedelta(seconds=int(segment['start']))) + ',000'
	endTime = str(0) + str(timedelta(seconds=int(segment['end']))) + ',000'
	text = segment['text']
	segmentId = segment['id'] + 1
	print(f"Processing segment: {segmentId}")

	if len(text) > 0:
	if text[0] == ' ':
	text = text[1:]

	print(f"Segment text: {text}")

	segment = f"{segmentId}\n{startTime} --> {endTime}\n{text}\n\n"

	srtFilename = os.path.join(".", f"{short_filename}.srt")
	with open(srtFilename, 'a', encoding='utf-8') as srtFile:
	srtFile.write(segment)

	return srtFilename

	if __name__ == '__main__':
	if len(sys.argv) != 2:
	print("Usage: python whisperpy.py [system path of webm or mp4 file OR YouTube URL]")
	sys.exit(1)

	input_path = sys.argv[1]
	try:
	result = transcribe_audio(input_path)
	print("SRT file created:", result)
	except Exception as e:
	print("Error:", str(e))


	# this is derived from https://github.com/openai/whisper/discussions/98#discussioncomment-4583593