Created
October 5, 2021 16:47
-
-
Save frytoli/3917d7617362219cf5cff4f31429d798 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
''' | |
Download all videos in a playlist from YouTube, record metadata in a CSV file, and split all videos into frame images. | |
''' | |
from pytube import Playlist | |
import random | |
import time | |
import csv | |
import cv2 | |
import sys | |
import os | |
def download_playlist(): | |
# Prep videos directory | |
if not os.path.exists(os.path.join(os.getcwd(), 'videos')): | |
os.mkdir(os.path.join(os.getcwd(), 'videos')) | |
# Instantiate data structure for CSV data | |
data = [['id', 'filename', 'title', 'url', 'publish_date']] | |
# Retrieve playlist | |
playlist = Playlist('https://www.youtube.com/playlist?list=UUan3Wnqj2kRqKwt2HgLgw1g') | |
print(f'[+] Downloading {len(playlist.video_urls)} videos from playlist...') | |
# Iterate over videos and download | |
for video in playlist.videos: | |
# Download | |
filename = f'{video.video_id}.mp4' | |
try: | |
video.streams.\ | |
filter(type='video', progressive=True, file_extension='mp4').\ | |
order_by('resolution').\ | |
desc().\ | |
first().\ | |
download(output_path=os.path.join(os.getcwd(), 'videos'), filename=filename) | |
# Save metadata | |
data.append([ | |
video.video_id, | |
filename, | |
video.title, | |
video.watch_url, | |
video.publish_date | |
]) | |
except: | |
print(f' [!] Error downloading video {video.video_id}') | |
time.sleep(random.randint(3,8)) | |
# Write CSV data to file | |
with open('videos.csv','w') as outcsv: | |
csvwriter = csv.writer(outcsv) | |
for row in data: | |
csvwriter.writerow(row) | |
def split_frames(): | |
vid_dir = os.path.join(os.getcwd(), 'videos') | |
if os.path.exists(vid_dir): | |
# Prep frames directory | |
frames_dir = os.path.join(os.getcwd(), 'frames') | |
if not os.path.exists(frames_dir): | |
os.mkdir(frames_dir) | |
vids = [vid for vid in os.listdir(vid_dir) if vid.endswith('.mp4')] | |
for vid in vids: | |
print(f'[+] Parsing frames from {vid}') | |
# Prep frames subdir | |
frames_subdir = os.path.join(frames_dir, vid.split('.')[0]) | |
if not os.path.exists(frames_subdir): | |
os.mkdir(frames_subdir) | |
vidcap = cv2.VideoCapture(os.path.join(vid_dir, vid)) | |
success, img = vidcap.read() | |
count = 0 | |
while success: | |
cv2.imwrite(os.path.join(frames_subdir, f'frame{count}.jpg'), img) | |
success, img = vidcap.read() | |
count += 1 | |
print(f' [-] Read {count} frames') | |
if __name__ == '__main__': | |
if len(sys.argv) > 1 and (sys.argv[1].lower() == 'download' or sys.argv[1].lower() == '--d'): | |
download_playlist() | |
elif len(sys.argv) > 1 and (sys.argv[1].lower() == 'split' or sys.argv[1].lower() == '--s'): | |
split_frames() | |
else: | |
print('''Specify the desired action:\n\tdownload, --d: Downlod all videos uploaded to ALERTWildfire's YouTube channel\n\tsplit, --s: Split all downloaded videos into frame images''') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment