Skip to content

Instantly share code, notes, and snippets.

@HaydenFaulkner
Last active March 19, 2024 16:37
Show Gist options
  • Save HaydenFaulkner/54318fd3e9b9bdb66c5440c44e4e08b8 to your computer and use it in GitHub Desktop.
Save HaydenFaulkner/54318fd3e9b9bdb66c5440c44e4e08b8 to your computer and use it in GitHub Desktop.
Fast frame extraction from videos using Python and OpenCV
from concurrent.futures import ProcessPoolExecutor, as_completed
import cv2
import multiprocessing
import os
import sys
def print_progress(iteration, total, prefix='', suffix='', decimals=3, bar_length=100):
"""
Call in a loop to create standard out progress bar
:param iteration: current iteration
:param total: total iterations
:param prefix: prefix string
:param suffix: suffix string
:param decimals: positive number of decimals in percent complete
:param bar_length: character length of bar
:return: None
"""
format_str = "{0:." + str(decimals) + "f}" # format the % done number string
percents = format_str.format(100 * (iteration / float(total))) # calculate the % done
filled_length = int(round(bar_length * iteration / float(total))) # calculate the filled bar length
bar = '#' * filled_length + '-' * (bar_length - filled_length) # generate the bar string
sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)), # write out the bar
sys.stdout.flush() # flush to stdout
def extract_frames(video_path, frames_dir, overwrite=False, start=-1, end=-1, every=1):
"""
Extract frames from a video using OpenCVs VideoCapture
:param video_path: path of the video
:param frames_dir: the directory to save the frames
:param overwrite: to overwrite frames that already exist?
:param start: start frame
:param end: end frame
:param every: frame spacing
:return: count of images saved
"""
video_path = os.path.normpath(video_path) # make the paths OS (Windows) compatible
frames_dir = os.path.normpath(frames_dir) # make the paths OS (Windows) compatible
video_dir, video_filename = os.path.split(video_path) # get the video path and filename from the path
assert os.path.exists(video_path) # assert the video file exists
capture = cv2.VideoCapture(video_path) # open the video using OpenCV
if start < 0: # if start isn't specified lets assume 0
start = 0
if end < 0: # if end isn't specified assume the end of the video
end = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
capture.set(1, start) # set the starting frame of the capture
frame = start # keep track of which frame we are up to, starting from start
while_safety = 0 # a safety counter to ensure we don't enter an infinite while loop (hopefully we won't need it)
saved_count = 0 # a count of how many frames we have saved
while frame < end: # lets loop through the frames until the end
_, image = capture.read() # read an image from the capture
if while_safety > 500: # break the while if our safety maxs out at 500
break
# sometimes OpenCV reads None's during a video, in which case we want to just skip
if image is None: # if we get a bad return flag or the image we read is None, lets not save
while_safety += 1 # add 1 to our while safety, since we skip before incrementing our frame variable
continue # skip
if frame % every == 0: # if this is a frame we want to write out based on the 'every' argument
while_safety = 0 # reset the safety count
save_path = os.path.join(frames_dir, video_filename, "{:010d}.jpg".format(frame)) # create the save path
if not os.path.exists(save_path) or overwrite: # if it doesn't exist or we want to overwrite anyways
cv2.imwrite(save_path, image) # save the extracted image
saved_count += 1 # increment our counter by one
frame += 1 # increment our frame count
capture.release() # after the while has finished close the capture
return saved_count # and return the count of the images we saved
def video_to_frames(video_path, frames_dir, overwrite=False, every=1, chunk_size=1000):
"""
Extracts the frames from a video using multiprocessing
:param video_path: path to the video
:param frames_dir: directory to save the frames
:param overwrite: overwrite frames if they exist?
:param every: extract every this many frames
:param chunk_size: how many frames to split into chunks (one chunk per cpu core process)
:return: path to the directory where the frames were saved, or None if fails
"""
video_path = os.path.normpath(video_path) # make the paths OS (Windows) compatible
frames_dir = os.path.normpath(frames_dir) # make the paths OS (Windows) compatible
video_dir, video_filename = os.path.split(video_path) # get the video path and filename from the path
# make directory to save frames, its a sub dir in the frames_dir with the video name
os.makedirs(os.path.join(frames_dir, video_filename), exist_ok=True)
capture = cv2.VideoCapture(video_path) # load the video
total = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) # get its total frame count
capture.release() # release the capture straight away
if total < 1: # if video has no frames, might be and opencv error
print("Video has no frames. Check your OpenCV + ffmpeg installation")
return None # return None
frame_chunks = [[i, i+chunk_size] for i in range(0, total, chunk_size)] # split the frames into chunk lists
frame_chunks[-1][-1] = min(frame_chunks[-1][-1], total-1) # make sure last chunk has correct end frame, also handles case chunk_size < total
prefix_str = "Extracting frames from {}".format(video_filename) # a prefix string to be printed in progress bar
# execute across multiple cpu cores to speed up processing, get the count automatically
with ProcessPoolExecutor(max_workers=multiprocessing.cpu_count()) as executor:
futures = [executor.submit(extract_frames, video_path, frames_dir, overwrite, f[0], f[1], every)
for f in frame_chunks] # submit the processes: extract_frames(...)
for i, f in enumerate(as_completed(futures)): # as each process completes
print_progress(i, len(frame_chunks)-1, prefix=prefix_str, suffix='Complete') # print it's progress
return os.path.join(frames_dir, video_filename) # when done return the directory containing the frames
if __name__ == '__main__':
# test it
video_to_frames(video_path='test.mp4', frames_dir='test_frames', overwrite=False, every=5, chunk_size=1000)
@sronilsson
Copy link

This is great @HaydenFaulkner! frame_chunks[-1][-1] = min(frame_chunks[-1][-1], total-1) Had me losing the final frame though, anything I should be aware of, or can I just drop the -1?

@HaydenFaulkner
Copy link
Author

HaydenFaulkner commented Jul 2, 2020

Hey @sronilsson

This is great @HaydenFaulkner! frame_chunks[-1][-1] = min(frame_chunks[-1][-1], total-1) Had me losing the final frame though, anything I should be aware of, or can I just drop the -1?

Oh I didn't realise that cut off the last frame, sure drop the -1 and see if it outputs all. :)

@Worthingtravis
Copy link

Potential performance boost?

Being that there is an option to skip frames, you can move the lines

_, image = capture.read()

if image is None: 
     while_safety += 1 
     continue  # skip

below

if frame % every == 0:

Because if you aren't using that frame, then there is no point to read the image from the capture.

@HaydenFaulkner
Copy link
Author

Potential performance boost?

Being that there is an option to skip frames, you can move the lines

_, image = capture.read()

if image is None: 
     while_safety += 1 
     continue  # skip

below

if frame % every == 0:

Because if you aren't using that frame, then there is no point to read the image from the capture.

We still need to perform capture.read() every iteration as that is what moves the video reader forwards, so we want to continue it moving forward even when we aren't writing out the frames. You are correct that the if image is None can go within the if frame % every == 0 block though, as long as it is checked before trying to write out the image as image = None will error. I suppose there is an argument to keep it before the if frame % every == 0 for the case that the capture.read() keeps giving Nones, so it will break quicker, however this is likely the rarer case.

Thanks for your suggestions :)

@1chimaruGin
Copy link

How to terminate ProcessPoolExecutor?

@detkov
Copy link

detkov commented Feb 9, 2021

I'd like to mention that if we have the frames number in the video less than chunk_size, then the code will raise an ZeroDivisionError on the 22nd line since the total equals to zero.

@zuliani99
Copy link

I have two questions:

  1. Can the executor do not return anything since I have only to store the frames? Like it follows:
for f in frame_chunks:
       executor.submit(extract_frames, video_path, frames_dir, overwrite, f[0], f[1], every) 
  1. Is it necessary to implement a function for the as_completed pass, or I can skip this point since my extract_frames has no return values?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment