frytoli/youtube_to_frames.py

## youtube_to_frames.py
#!/usr/bin/env python3

'''
Download all videos in a playlist from YouTube, record metadata in a CSV file, and split all videos into frame images.
'''

from pytube import Playlist
import random
import time
import csv
import cv2
import sys
import os

def download_playlist():
    # Prep videos directory
    if not os.path.exists(os.path.join(os.getcwd(), 'videos')):
        os.mkdir(os.path.join(os.getcwd(), 'videos'))
    # Instantiate data structure for CSV data
    data = [['id', 'filename', 'title', 'url', 'publish_date']]
    # Retrieve playlist
    playlist = Playlist('https://www.youtube.com/playlist?list=UUan3Wnqj2kRqKwt2HgLgw1g')
    print(f'[+] Downloading {len(playlist.video_urls)} videos from playlist...')
    # Iterate over videos and download
    for video in playlist.videos:
        # Download
        filename = f'{video.video_id}.mp4'
        try:
            video.streams.\
                filter(type='video', progressive=True, file_extension='mp4').\
                order_by('resolution').\
                desc().\
                first().\
                download(output_path=os.path.join(os.getcwd(), 'videos'), filename=filename)
            # Save metadata
            data.append([
                video.video_id,
                filename,
                video.title,
                video.watch_url,
                video.publish_date
            ])
        except:
            print(f'  [!] Error downloading video {video.video_id}')
        time.sleep(random.randint(3,8))
    # Write CSV data to file
    with open('videos.csv','w') as outcsv:
        csvwriter = csv.writer(outcsv)
        for row in data:
            csvwriter.writerow(row)

def split_frames():
    vid_dir = os.path.join(os.getcwd(), 'videos')
    if os.path.exists(vid_dir):
        # Prep frames directory
        frames_dir = os.path.join(os.getcwd(), 'frames')
        if not os.path.exists(frames_dir):
            os.mkdir(frames_dir)
        vids = [vid for vid in os.listdir(vid_dir) if vid.endswith('.mp4')]
        for vid in vids:
            print(f'[+] Parsing frames from {vid}')
            # Prep frames subdir
            frames_subdir = os.path.join(frames_dir, vid.split('.')[0])
            if not os.path.exists(frames_subdir):
                os.mkdir(frames_subdir)
            vidcap = cv2.VideoCapture(os.path.join(vid_dir, vid))
            success, img = vidcap.read()
            count = 0
            while success:
                cv2.imwrite(os.path.join(frames_subdir, f'frame{count}.jpg'), img)
                success, img = vidcap.read()
                count += 1
            print(f'  [-] Read {count} frames')

if __name__ == '__main__':
    if len(sys.argv) > 1 and (sys.argv[1].lower() == 'download' or sys.argv[1].lower() == '--d'):
        download_playlist()
    elif len(sys.argv) > 1 and (sys.argv[1].lower() == 'split' or sys.argv[1].lower() == '--s'):
        split_frames()
    else:
        print('''Specify the desired action:\n\tdownload, --d: Downlod all videos uploaded to ALERTWildfire's YouTube channel\n\tsplit, --s: Split all downloaded videos into frame images''')
	#!/usr/bin/env python3

	'''
	Download all videos in a playlist from YouTube, record metadata in a CSV file, and split all videos into frame images.
	'''

	from pytube import Playlist
	import random
	import time
	import csv
	import cv2
	import sys
	import os

	def download_playlist():
	# Prep videos directory
	if not os.path.exists(os.path.join(os.getcwd(), 'videos')):
	os.mkdir(os.path.join(os.getcwd(), 'videos'))
	# Instantiate data structure for CSV data
	data = [['id', 'filename', 'title', 'url', 'publish_date']]
	# Retrieve playlist
	playlist = Playlist('https://www.youtube.com/playlist?list=UUan3Wnqj2kRqKwt2HgLgw1g')
	print(f'[+] Downloading {len(playlist.video_urls)} videos from playlist...')
	# Iterate over videos and download
	for video in playlist.videos:
	# Download
	filename = f'{video.video_id}.mp4'
	try:
	video.streams.\
	filter(type='video', progressive=True, file_extension='mp4').\
	order_by('resolution').\
	desc().\
	first().\
	download(output_path=os.path.join(os.getcwd(), 'videos'), filename=filename)
	# Save metadata
	data.append([
	video.video_id,
	filename,
	video.title,
	video.watch_url,
	video.publish_date
	])
	except:
	print(f' [!] Error downloading video {video.video_id}')
	time.sleep(random.randint(3,8))
	# Write CSV data to file
	with open('videos.csv','w') as outcsv:
	csvwriter = csv.writer(outcsv)
	for row in data:
	csvwriter.writerow(row)

	def split_frames():
	vid_dir = os.path.join(os.getcwd(), 'videos')
	if os.path.exists(vid_dir):
	# Prep frames directory
	frames_dir = os.path.join(os.getcwd(), 'frames')
	if not os.path.exists(frames_dir):
	os.mkdir(frames_dir)
	vids = [vid for vid in os.listdir(vid_dir) if vid.endswith('.mp4')]
	for vid in vids:
	print(f'[+] Parsing frames from {vid}')
	# Prep frames subdir
	frames_subdir = os.path.join(frames_dir, vid.split('.')[0])
	if not os.path.exists(frames_subdir):
	os.mkdir(frames_subdir)
	vidcap = cv2.VideoCapture(os.path.join(vid_dir, vid))
	success, img = vidcap.read()
	count = 0
	while success:
	cv2.imwrite(os.path.join(frames_subdir, f'frame{count}.jpg'), img)
	success, img = vidcap.read()
	count += 1
	print(f' [-] Read {count} frames')

	if __name__ == '__main__':
	if len(sys.argv) > 1 and (sys.argv[1].lower() == 'download' or sys.argv[1].lower() == '--d'):
	download_playlist()
	elif len(sys.argv) > 1 and (sys.argv[1].lower() == 'split' or sys.argv[1].lower() == '--s'):
	split_frames()
	else:
	print('''Specify the desired action:\n\tdownload, --d: Downlod all videos uploaded to ALERTWildfire's YouTube channel\n\tsplit, --s: Split all downloaded videos into frame images''')