0187773933/VideoExtractTransitionFrames.py

## VideoExtractTransitionFrames.py
#!/usr/bin/env python3
import cv2
import imagehash
import sys
from PIL import Image
import numpy as np
from pathlib import Path
from natsort import humansorted

# pip install opencv-python ImageHash pillow natsort

ALLOWED_EXTENSIONS = [ ".mp4" , ".webm" ]

HASH_DIFF_THRESHOLD = 10

def get_frame_hash(frame):
	"""Convert the frame to an image and return its hash."""
	img = Image.fromarray(frame)
	return imagehash.average_hash(img)

def save_frame(frame, count, output_base_path):
	"""Save the given frame as an image file."""
	print(f"Saving Frame - {count}")
	save_path = str(output_base_path.joinpath(f"{str(count).zfill(3)}.jpeg"))
	cv2.imwrite(save_path, frame)

def process_video(video_path, output_base_path):
	"""Process the video and save transition frames as images."""
	# Open the video file.
	video = cv2.VideoCapture(str(video_path))

	# Get the total number of frames in the video.
	total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

	frame_count = 0
	prev_frame_hash = None
	total_frames_seen = 0

	while True:
		# Read the next frame.
		ret, frame = video.read()
		if not ret:
			# We have reached the end of the video.
			break
		total_frames_seen += 1
		print(f"Processing Frame - {total_frames_seen}/{total_frames} ({100 * total_frames_seen / total_frames:.2f}%) - Total Transitions - {frame_count}")

		# Convert the frame to grayscale. This simplifies the subsequent calculations and should be
		# good enough for detecting scene transitions.
		gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

		# Calculate the hash of the current frame.
		frame_hash = get_frame_hash(gray)

		if prev_frame_hash is not None:
			# Calculate the difference between the current frame and the previous frame.
			hash_diff = frame_hash - prev_frame_hash

			# If the difference is large enough, we assume that a transition has occurred.
			# Note: The threshold of 10 is arbitrary and you may need to adjust it for your videos.
			if hash_diff > HASH_DIFF_THRESHOLD:
				# Save the frame.
				frame_count += 1
				save_frame(frame, frame_count, output_base_path)

		prev_frame_hash = frame_hash

	# Release the video file.
	video.release()

if __name__ == "__main__":
	# Get the base directory path from command line argument or use current directory if not specified.
	base_directory_posix_path = Path(sys.argv[1] if len(sys.argv) > 1 else Path.cwd())

	# Get all video files in the base directory and its subdirectories.
	files_posix_in_base_directory = base_directory_posix_path.glob('**/*')
	files_posix_in_base_directory = [x for x in files_posix_in_base_directory if x.is_file()]
	files_posix_in_base_directory = [x for x in files_posix_in_base_directory if x.suffix in ALLOWED_EXTENSIONS]
	video_files = humansorted(files_posix_in_base_directory)

	# Process each video file.
	for video_file in video_files:
		print(f"Processing Video - {video_file}")

		# Create output directory for frames.
		output_base_path = video_file.parent.joinpath(f"{video_file.stem}-frames")
		output_base_path.mkdir(parents=True, exist_ok=True)

		# Process the video
		process_video(video_file, output_base_path)
	#!/usr/bin/env python3
	import cv2
	import imagehash
	import sys
	from PIL import Image
	import numpy as np
	from pathlib import Path
	from natsort import humansorted

	# pip install opencv-python ImageHash pillow natsort

	ALLOWED_EXTENSIONS = [ ".mp4" , ".webm" ]

	HASH_DIFF_THRESHOLD = 10

	def get_frame_hash(frame):
	"""Convert the frame to an image and return its hash."""
	img = Image.fromarray(frame)
	return imagehash.average_hash(img)

	def save_frame(frame, count, output_base_path):
	"""Save the given frame as an image file."""
	print(f"Saving Frame - {count}")
	save_path = str(output_base_path.joinpath(f"{str(count).zfill(3)}.jpeg"))
	cv2.imwrite(save_path, frame)

	def process_video(video_path, output_base_path):
	"""Process the video and save transition frames as images."""
	# Open the video file.
	video = cv2.VideoCapture(str(video_path))

	# Get the total number of frames in the video.
	total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

	frame_count = 0
	prev_frame_hash = None
	total_frames_seen = 0

	while True:
	# Read the next frame.
	ret, frame = video.read()
	if not ret:
	# We have reached the end of the video.
	break
	total_frames_seen += 1
	print(f"Processing Frame - {total_frames_seen}/{total_frames} ({100 * total_frames_seen / total_frames:.2f}%) - Total Transitions - {frame_count}")

	# Convert the frame to grayscale. This simplifies the subsequent calculations and should be
	# good enough for detecting scene transitions.
	gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

	# Calculate the hash of the current frame.
	frame_hash = get_frame_hash(gray)

	if prev_frame_hash is not None:
	# Calculate the difference between the current frame and the previous frame.
	hash_diff = frame_hash - prev_frame_hash

	# If the difference is large enough, we assume that a transition has occurred.
	# Note: The threshold of 10 is arbitrary and you may need to adjust it for your videos.
	if hash_diff > HASH_DIFF_THRESHOLD:
	# Save the frame.
	frame_count += 1
	save_frame(frame, frame_count, output_base_path)

	prev_frame_hash = frame_hash

	# Release the video file.
	video.release()

	if __name__ == "__main__":
	# Get the base directory path from command line argument or use current directory if not specified.
	base_directory_posix_path = Path(sys.argv[1] if len(sys.argv) > 1 else Path.cwd())

	# Get all video files in the base directory and its subdirectories.
	files_posix_in_base_directory = base_directory_posix_path.glob('*/')
	files_posix_in_base_directory = [x for x in files_posix_in_base_directory if x.is_file()]
	files_posix_in_base_directory = [x for x in files_posix_in_base_directory if x.suffix in ALLOWED_EXTENSIONS]
	video_files = humansorted(files_posix_in_base_directory)

	# Process each video file.
	for video_file in video_files:
	print(f"Processing Video - {video_file}")

	# Create output directory for frames.
	output_base_path = video_file.parent.joinpath(f"{video_file.stem}-frames")
	output_base_path.mkdir(parents=True, exist_ok=True)

	# Process the video
	process_video(video_file, output_base_path)