Skip to content

Instantly share code, notes, and snippets.

@HaydenFaulkner
Created July 15, 2020 01:09
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 5 You must be signed in to fork a gist
  • Save HaydenFaulkner/3aa69130017d6405a8c0580c63bee8e6 to your computer and use it in GitHub Desktop.
Save HaydenFaulkner/3aa69130017d6405a8c0580c63bee8e6 to your computer and use it in GitHub Desktop.
decord version of video_to_frame.py
import cv2 # still used to save images out
import os
import numpy as np
from decord import VideoReader
from decord import cpu, gpu
def extract_frames(video_path, frames_dir, overwrite=False, start=-1, end=-1, every=1):
"""
Extract frames from a video using decord's VideoReader
:param video_path: path of the video
:param frames_dir: the directory to save the frames
:param overwrite: to overwrite frames that already exist?
:param start: start frame
:param end: end frame
:param every: frame spacing
:return: count of images saved
"""
video_path = os.path.normpath(video_path) # make the paths OS (Windows) compatible
frames_dir = os.path.normpath(frames_dir) # make the paths OS (Windows) compatible
video_dir, video_filename = os.path.split(video_path) # get the video path and filename from the path
assert os.path.exists(video_path) # assert the video file exists
# load the VideoReader
vr = VideoReader(video_path, ctx=cpu(0)) # can set to cpu or gpu .. ctx=gpu(0)
if start < 0: # if start isn't specified lets assume 0
start = 0
if end < 0: # if end isn't specified assume the end of the video
end = len(vr)
frames_list = list(range(start, end, every))
saved_count = 0
if every > 25 and len(frames_list) < 1000: # this is faster for every > 25 frames and can fit in memory
frames = vr.get_batch(frames_list).asnumpy()
for index, frame in zip(frames_list, frames): # lets loop through the frames until the end
save_path = os.path.join(frames_dir, video_filename, "{:010d}.jpg".format(index)) # create the save path
if not os.path.exists(save_path) or overwrite: # if it doesn't exist or we want to overwrite anyways
cv2.imwrite(save_path, cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)) # save the extracted image
saved_count += 1 # increment our counter by one
else: # this is faster for every <25 and consumes small memory
for index in range(start, end): # lets loop through the frames until the end
frame = vr[index] # read an image from the capture
if index % every == 0: # if this is a frame we want to write out based on the 'every' argument
save_path = os.path.join(frames_dir, video_filename, "{:010d}.jpg".format(index)) # create the save path
if not os.path.exists(save_path) or overwrite: # if it doesn't exist or we want to overwrite anyways
cv2.imwrite(save_path, cv2.cvtColor(frame.asnumpy(), cv2.COLOR_RGB2BGR)) # save the extracted image
saved_count += 1 # increment our counter by one
return saved_count # and return the count of the images we saved
def video_to_frames(video_path, frames_dir, overwrite=False, every=1):
"""
Extracts the frames from a video
:param video_path: path to the video
:param frames_dir: directory to save the frames
:param overwrite: overwrite frames if they exist?
:param every: extract every this many frames
:return: path to the directory where the frames were saved, or None if fails
"""
video_path = os.path.normpath(video_path) # make the paths OS (Windows) compatible
frames_dir = os.path.normpath(frames_dir) # make the paths OS (Windows) compatible
video_dir, video_filename = os.path.split(video_path) # get the video path and filename from the path
# make directory to save frames, its a sub dir in the frames_dir with the video name
os.makedirs(os.path.join(frames_dir, video_filename), exist_ok=True)
print("Extracting frames from {}".format(video_filename))
extract_frames(video_path, frames_dir, every=every) # let's now extract the frames
return os.path.join(frames_dir, video_filename) # when done return the directory containing the frames
if __name__ == '__main__':
# test it
video_to_frames(video_path='test.mp4', frames_dir='test_frames', overwrite=False, every=5)
@ckocmoca
Copy link

I tested "video_to_frames.py" and "video_to_frames_decord.py".
With a large video size (730 MB) and frequent frames (every 5), "video_to_frames.py" wins because multiprocessing works.
If done every 20 frames, then Decord wins.
I tried to make the "video_to_frames_decord.py" file multiprocessor, but no gain in time.
I'm a newbie. Please help make the file "video_to_frames_decord.py" to work on all cores.

I inserted this code before return in def video_to_frames (chunk_size wrote too):
fred = len(video_path) #number of video frames

frame_chunks = [[i, i+chunk_size] for i in range(0, fred, chunk_size)]  # split the frames into chunk lists
frame_chunks[-1][-1] = min(frame_chunks[-1][-1], fred-1)  # make sure last chunk has correct end frame, also handles case chunk_size < total
with ProcessPoolExecutor(max_workers=multiprocessing.cpu_count()) as executor:
   futures = [executor.submit(extract_frames, video_path, frames_dir, overwrite, f[0], f[1], every)
       for f in frame_chunks]  # submit the processes: extract_frames(...)

@glitched-shadeslayer
Copy link

I want to extract frames at a specific fps, how would I be able to achieve that with this piece of code?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment