Last active
December 20, 2023 15:07
-
-
Save greg-randall/6b8e910ef0b3c79fa4996b81b69055b9 to your computer and use it in GitHub Desktop.
Generate a timeline of screenshots from a video 'vid.mp4' and subtitles 'subs.vtt'. Use yt-dlp or similar to collect your subs and video.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import webvtt | |
from collections import defaultdict | |
import cv2 | |
import glob | |
import os | |
from pprint import pprint | |
sub_files = glob.glob('*.srt') | |
sub_files.append( glob.glob('*.vtt') ) | |
#print(sub_files) | |
sub_file = sub_files[0] | |
# Check if the file is an srt file | |
if os.path.splitext(sub_file)[1] == '.srt': | |
vtt_file = webvtt.from_srt(sub_file) | |
#print(vtt_file) | |
vtt_file.save(os.path.splitext(sub_file)[0] + '.vtt') | |
sub_file = os.path.splitext(sub_file)[0] + '.vtt' | |
# Open the WebVTT file | |
vtt = webvtt.read(sub_file) | |
#j=0 | |
# Create a dictionary with the start time as the key and the text as the value. | |
subtitles = defaultdict(list) | |
for caption in vtt: | |
text=caption.text | |
split_text=text.split(f"\n") | |
k=0 | |
for split in split_text: | |
if split.strip() != "": | |
subtitles[f"{caption.start}{k}"] = split.strip() | |
k+=1 | |
#print(text) | |
#print(split_text) | |
#print("") | |
#j+=1 | |
#if j>10: | |
# break | |
#pprint (subtitles) | |
# Remove duplicate entries | |
subtitles_clean = {} | |
last = "" | |
for timestamp, sub in subtitles.items(): | |
if sub != last: | |
subtitles_clean[timestamp] = sub | |
last = sub | |
video_files = glob.glob('*.mp4') | |
video_files.append(glob.glob('*.webm')) | |
print(video_files) | |
video_file = video_files[0] | |
cap = cv2.VideoCapture(video_file) | |
output_string = '''<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="utf-8"> | |
<meta content="width=device-width, initial-scale=1" name="viewport"> | |
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous"> | |
<title></title> | |
<style>figcaption{text-shadow: 0px 0px 10px #000000, 0px 0px 10px #000000, 0px 0px 10px #000000, 0px 0px 10px #000000; font-size:1.25em !important; position: absolute; bottom: 0; width: 100%; background: rgba(0, 0, 0, 0.25); color: white !important;}</style> | |
</head> | |
<body> | |
<div class="container mt-3"> | |
<div class="row"> | |
''' | |
with open("index.html", "w") as f: | |
f.write(output_string) | |
i=1 | |
for timestamp, sub in subtitles_clean.items(): | |
print(f"generating frame at {timestamp} with text \"{sub}\".") | |
# Convert the timestamp to milliseconds | |
hours, minutes, seconds = map(float, timestamp.split(':')) | |
milliseconds = int((hours * 3600 + minutes * 60 + seconds) * 1000) | |
# Set the frame position to the timestamp | |
cap.set(cv2.CAP_PROP_POS_MSEC, milliseconds) | |
# Read the frame | |
ret, frame = cap.read() | |
if frame.shape[1] >= 720: | |
# Get the aspect ratio | |
aspect_ratio = frame.shape[1] / frame.shape[0] | |
# Calculate the height | |
height = int(720 / aspect_ratio) | |
# Resize the image | |
frame = cv2.resize(frame, (720, height)) | |
# Save the frame | |
cv2.imwrite(f'{i:06}.jpg', frame, [int(cv2.IMWRITE_JPEG_QUALITY), 50]) | |
output_string = f''' | |
<div class="col-md-4"> | |
<figure class="figure" style="position: relative;"> | |
<img loading="lazy" alt="" class="figure-img img-fluid rounded mb-0" src="{i:06}.jpg" > | |
<figcaption class="figure-caption text-center rounded pb-1"> | |
{sub} | |
</figcaption> | |
</figure> | |
</div> | |
''' | |
with open("index.html", "a") as f: | |
f.write(output_string) | |
i+=1 | |
cap.release() | |
output_string = '''</div> | |
</div> | |
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/js/bootstrap.bundle.min.js" integrity="sha384-MrcW6ZMFYlzcLA8Nl+NtUVF0sA7MsXsP1UyJoMp4YLEuNSfAP+JcXn/tWtIaxVXM" crossorigin="anonymous"></script> | |
</body> | |
</html> | |
''' | |
with open("index.html", "a") as f: | |
f.write(output_string) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment