Skip to content

Instantly share code, notes, and snippets.

@jlmalone
Last active June 22, 2023 08:54
Show Gist options
  • Save jlmalone/448f712733a9c6312fddc704db457fbe to your computer and use it in GitHub Desktop.
Save jlmalone/448f712733a9c6312fddc704db457fbe to your computer and use it in GitHub Desktop.
The script will transcribe the audio and create an SRT file with the same name as the input file (or YouTube video title) in the current directory. The name of the SRT file will be printed as the output.
# Usage
# python3 whisperpy.py [filename_input or youtube url]
# The script will transcribe the audio and create an SRT file with the same name as the input file
# (or YouTube video title) in the current directory. The name of the SRT file will be printed as the output.
import sys
import whisper
import hashlib
from pytube import YouTube
from datetime import timedelta
import os
def download_video(url):
print("Start downloading", url)
yt = YouTube(url)
hash_file = hashlib.md5()
hash_file.update(yt.title.encode())
file_name = f'{hash_file.hexdigest()}.mp4'
yt.streams.first().download("", file_name)
print("Downloaded to", file_name)
return {
"file_name": file_name,
"title": yt.title
}
def transcribe_audio(path):
model = whisper.load_model("large")
print("Whisper model loaded.")
if path.startswith("https://www.youtube.com"):
video = download_video(path)
else:
video = {"file_name": path, "title": os.path.splitext(os.path.basename(path))[0]}
segments = model.transcribe(video["file_name"])['segments']
# Extract short filename from path if provided
short_filename = os.path.splitext(os.path.basename(path))[0]
for segment in segments:
startTime = str(0) + str(timedelta(seconds=int(segment['start']))) + ',000'
endTime = str(0) + str(timedelta(seconds=int(segment['end']))) + ',000'
text = segment['text']
segmentId = segment['id'] + 1
print(f"Processing segment: {segmentId}")
if len(text) > 0:
if text[0] == ' ':
text = text[1:]
print(f"Segment text: {text}")
segment = f"{segmentId}\n{startTime} --> {endTime}\n{text}\n\n"
srtFilename = os.path.join(".", f"{short_filename}.srt")
with open(srtFilename, 'a', encoding='utf-8') as srtFile:
srtFile.write(segment)
return srtFilename
if __name__ == '__main__':
if len(sys.argv) != 2:
print("Usage: python whisperpy.py [system path of webm or mp4 file OR YouTube URL]")
sys.exit(1)
input_path = sys.argv[1]
try:
result = transcribe_audio(input_path)
print("SRT file created:", result)
except Exception as e:
print("Error:", str(e))
# this is derived from https://github.com/openai/whisper/discussions/98#discussioncomment-4583593
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment