Skip to content

Instantly share code, notes, and snippets.

@frytoli
Created October 5, 2021 16:47
Show Gist options
  • Save frytoli/3917d7617362219cf5cff4f31429d798 to your computer and use it in GitHub Desktop.
Save frytoli/3917d7617362219cf5cff4f31429d798 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
'''
Download all videos in a playlist from YouTube, record metadata in a CSV file, and split all videos into frame images.
'''
from pytube import Playlist
import random
import time
import csv
import cv2
import sys
import os
def download_playlist():
# Prep videos directory
if not os.path.exists(os.path.join(os.getcwd(), 'videos')):
os.mkdir(os.path.join(os.getcwd(), 'videos'))
# Instantiate data structure for CSV data
data = [['id', 'filename', 'title', 'url', 'publish_date']]
# Retrieve playlist
playlist = Playlist('https://www.youtube.com/playlist?list=UUan3Wnqj2kRqKwt2HgLgw1g')
print(f'[+] Downloading {len(playlist.video_urls)} videos from playlist...')
# Iterate over videos and download
for video in playlist.videos:
# Download
filename = f'{video.video_id}.mp4'
try:
video.streams.\
filter(type='video', progressive=True, file_extension='mp4').\
order_by('resolution').\
desc().\
first().\
download(output_path=os.path.join(os.getcwd(), 'videos'), filename=filename)
# Save metadata
data.append([
video.video_id,
filename,
video.title,
video.watch_url,
video.publish_date
])
except:
print(f' [!] Error downloading video {video.video_id}')
time.sleep(random.randint(3,8))
# Write CSV data to file
with open('videos.csv','w') as outcsv:
csvwriter = csv.writer(outcsv)
for row in data:
csvwriter.writerow(row)
def split_frames():
vid_dir = os.path.join(os.getcwd(), 'videos')
if os.path.exists(vid_dir):
# Prep frames directory
frames_dir = os.path.join(os.getcwd(), 'frames')
if not os.path.exists(frames_dir):
os.mkdir(frames_dir)
vids = [vid for vid in os.listdir(vid_dir) if vid.endswith('.mp4')]
for vid in vids:
print(f'[+] Parsing frames from {vid}')
# Prep frames subdir
frames_subdir = os.path.join(frames_dir, vid.split('.')[0])
if not os.path.exists(frames_subdir):
os.mkdir(frames_subdir)
vidcap = cv2.VideoCapture(os.path.join(vid_dir, vid))
success, img = vidcap.read()
count = 0
while success:
cv2.imwrite(os.path.join(frames_subdir, f'frame{count}.jpg'), img)
success, img = vidcap.read()
count += 1
print(f' [-] Read {count} frames')
if __name__ == '__main__':
if len(sys.argv) > 1 and (sys.argv[1].lower() == 'download' or sys.argv[1].lower() == '--d'):
download_playlist()
elif len(sys.argv) > 1 and (sys.argv[1].lower() == 'split' or sys.argv[1].lower() == '--s'):
split_frames()
else:
print('''Specify the desired action:\n\tdownload, --d: Downlod all videos uploaded to ALERTWildfire's YouTube channel\n\tsplit, --s: Split all downloaded videos into frame images''')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment