Skip to content

Instantly share code, notes, and snippets.

@zeitiger
Created November 23, 2023 21:36
Show Gist options
  • Save zeitiger/71d394c47606cc082907d71fe7d2ec0c to your computer and use it in GitHub Desktop.
Save zeitiger/71d394c47606cc082907d71fe7d2ec0c to your computer and use it in GitHub Desktop.
Python script for splitting a video based on detected speakers: This script utilizes frame analysis to identify speakers in a video, comparing frames against known speaker images and creating separate video clips for each speaker found. The code employs similarity calculations and MoviePy for video processing, offering a starting point for speak…
import cv2
import numpy as np
from moviepy.editor import VideoFileClip
from moviepy.video.io.ffmpeg_writer import FFMPEG_VideoWriter as VideoFileClipWriter
lastest_speaker_frames = [None for _ in range(6)]
# TODO find a better way to calculate ssim
def calculate_ssim(x, y):
# Convert images to grayscale
gray_img1 = cv2.cvtColor(x, cv2.COLOR_BGR2GRAY)
gray_img2 = cv2.cvtColor(y, cv2.COLOR_BGR2GRAY)
# Calculate Structural Similarity Index (SSI)
return cv2.matchTemplate(gray_img1, gray_img2, cv2.TM_CCOEFF_NORMED)
def detect_and_write_frame(frame, known_speakers, writers):
similarities = [
calculate_ssim(frame, speaker_image)
for speaker_image in known_speakers
]
best_match_index = np.argmax([sim.item() for sim in similarities])
for i, writer in enumerate(writers):
if i == best_match_index:
writer.write_frame(frame)
lastest_speaker_frames[best_match_index] = frame
elif lastest_speaker_frames[i] is not None:
writer.write_frame(lastest_speaker_frames[i])
else:
writer.write_frame(green_frame(frame))
def green_frame(frame):
result = np.zeros_like(frame)
result[:, :, 1] = 255
return result
def split_video(input_file):
video = VideoFileClip(input_file)
output_paths = [f"output_speaker_{i + 1}.mp4" for i in range(6)]
known_speakers = [cv2.imread(f"tales-from-the-loop/speaker{i + 1}.png") for i in range(6)]
video_writers = [VideoFileClipWriter(output_path, video.size, video.fps) for output_path in output_paths]
print(f"All frame count {video.reader.nframes}")
i = 0
for frame in video.iter_frames():
detect_and_write_frame(frame, known_speakers, video_writers)
i += 1
if i % 100 == 0:
print(f"Processing frame {i} of {video.reader.nframes}")
for video_writer in video_writers:
video_writer.close()
split_video("tales-from-the-loop/source.mp4")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment