rubenhorn/list-videos.py

## list-videos.py
#!/usr/bin/python3

import os, sys, requests, re, html

banner = '''
#==================================================
# A script to process youtube playlist takeout
#==================================================
'''
print(banner)

if len(sys.argv) != 3:
    print('Usage: {} <takeout playlists path> <output file>'.format(sys.argv[0]))
    exit()

playlists_folder = os.path.abspath(sys.argv[1])
output_filename = os.path.abspath(sys.argv[2])

def get_playlist_files(playlists_folder):
   csv_files = filter(lambda f: f.endswith('.csv'), os.listdir(playlists_folder))
   return [playlists_folder + '/' + f for f in csv_files]

VIDEO_BASE_URL = 'https://www.youtube.com/watch?v='

def get_playlist_title(playlist_file):
    title = None
    with open(playlist_file, 'r') as file:
        file.readline()
        title = file.readline().split(',')[4]
    return title

def get_playlist_video_ids(playlist_file):
    lines = []
    with open(playlist_file, 'r') as file:
        lines += file.readlines()
    lines = lines[5:]
    return [l.split(',')[0] for l in lines]

def get_video_info(video_id):
    try:
        request = requests.get(VIDEO_BASE_URL + video_id)
        text = request.text
        pattern_title = '<meta name="title" content="([^"]*)">'
        title = html.unescape(re.search(pattern_title, text).group(1))
        pattern_channel_name = '"ownerChannelName":"([^"]*)"'
        channel_name = html.unescape(re.search(pattern_channel_name, text).group(1))
        return (video_id, title, channel_name)
    except:
        return None

def export_html(playlist_infos, filename):
    document_template = '''
    <!DOCTYPE html>
    <html>
        <head>
            <title>YouTube playlists</title>
        </head>
        <body>
            <span id="top"></span>
            <a href="#top" style="position: fixed; right: 10px; bottom: 10px">Go to top</a>
            <iframe name="player" style="position: fixed; right: 10px; top: 10px"></iframe>
            {playlists}
            <script>Array.from(document.getElementsByTagName("h3")).forEach(e =>{e.onclick=()=>{e.nextElementSibling.hidden=!e.nextElementSibling.hidden;};e.onclick()})</script>
        </body>
    </html>
    '''.strip()

    def append_playlist(file, title, video_infos):
        playlist_template = '''
            <h3>{title}</h3>
            <ul>{videos}</ul>
        '''.strip()
        file.write(playlist_template.split('{videos}')[0].replace('{title}', title))
        for video_info in video_infos:
            append_video(file, video_info)
        file.write(playlist_template.split('{videos}')[1])

    def append_video(file, video_info):
        video_template = '''
            <li>
                (<a href="https://www.youtube.com/embed/{video_id}?autoplay=1" target="player">
                    Play
                </a>) <a href="{base_url}{video_id}" target="_blank">
                    {title}
                </a> - {channel_name}
            </li>
        '''.strip().replace('{base_url}', VIDEO_BASE_URL)
        file.write(video_template.replace('{video_id}', video_info[0]).replace('{title}', video_info[1]).replace('{channel_name}', video_info[2]))

    with open(filename, 'w', encoding='utf-8') as file:
        file.write(document_template.split('{playlists}')[0])
        for playlist_title in playlist_infos:
            append_playlist(file, playlist_title, playlist_infos[playlist_title])
        file.write(document_template.split('{playlists}')[1])

print('STARTING\n')

playlist_infos = dict()

playlist_files = get_playlist_files(playlists_folder)
playlist_count = len(playlist_files)
for i in range(playlist_count):
    playlist_file = playlist_files[i]
    playlist_title = get_playlist_title(playlist_file)
    print('Assembling playlist ({}/{}) :{}'.format(i + 1, playlist_count, playlist_title))
    video_ids = get_playlist_video_ids(playlist_file)
    video_infos = []
    for id in video_ids:
        video_info = get_video_info(id)
        if video_info is not None:
            print('  - {} by {}'.format(video_info[1], video_info[2]))
            video_infos.append(video_info)
        playlist_infos.update({playlist_title: video_infos})

export_html(playlist_infos, output_filename)

print('\nDONE')
	#!/usr/bin/python3

	import os, sys, requests, re, html

	banner = '''
	#==================================================
	# A script to process youtube playlist takeout
	#==================================================
	'''
	print(banner)

	if len(sys.argv) != 3:
	print('Usage: {} <takeout playlists path> <output file>'.format(sys.argv[0]))
	exit()

	playlists_folder = os.path.abspath(sys.argv[1])
	output_filename = os.path.abspath(sys.argv[2])

	def get_playlist_files(playlists_folder):
	csv_files = filter(lambda f: f.endswith('.csv'), os.listdir(playlists_folder))
	return [playlists_folder + '/' + f for f in csv_files]

	VIDEO_BASE_URL = 'https://www.youtube.com/watch?v='

	def get_playlist_title(playlist_file):
	title = None
	with open(playlist_file, 'r') as file:
	file.readline()
	title = file.readline().split(',')[4]
	return title

	def get_playlist_video_ids(playlist_file):
	lines = []
	with open(playlist_file, 'r') as file:
	lines += file.readlines()
	lines = lines[5:]
	return [l.split(',')[0] for l in lines]

	def get_video_info(video_id):
	try:
	request = requests.get(VIDEO_BASE_URL + video_id)
	text = request.text
	pattern_title = '<meta name="title" content="([^"]*)">'
	title = html.unescape(re.search(pattern_title, text).group(1))
	pattern_channel_name = '"ownerChannelName":"([^"]*)"'
	channel_name = html.unescape(re.search(pattern_channel_name, text).group(1))
	return (video_id, title, channel_name)
	except:
	return None

	def export_html(playlist_infos, filename):
	document_template = '''
	<!DOCTYPE html>
	<html>
	<head>
	<title>YouTube playlists</title>
	</head>
	<body>
	<span id="top"></span>
	<a href="#top" style="position: fixed; right: 10px; bottom: 10px">Go to top</a>
	<iframe name="player" style="position: fixed; right: 10px; top: 10px"></iframe>
	{playlists}
	<script>Array.from(document.getElementsByTagName("h3")).forEach(e =>{e.onclick=()=>{e.nextElementSibling.hidden=!e.nextElementSibling.hidden;};e.onclick()})</script>
	</body>
	</html>
	'''.strip()

	def append_playlist(file, title, video_infos):
	playlist_template = '''
	<h3>{title}</h3>
	<ul>{videos}</ul>
	'''.strip()
	file.write(playlist_template.split('{videos}')[0].replace('{title}', title))
	for video_info in video_infos:
	append_video(file, video_info)
	file.write(playlist_template.split('{videos}')[1])

	def append_video(file, video_info):
	video_template = '''
	<li>
	(<a href="https://www.youtube.com/embed/{video_id}?autoplay=1" target="player">
	Play
	</a>) <a href="{base_url}{video_id}" target="_blank">
	{title}
	</a> - {channel_name}
	</li>
	'''.strip().replace('{base_url}', VIDEO_BASE_URL)
	file.write(video_template.replace('{video_id}', video_info[0]).replace('{title}', video_info[1]).replace('{channel_name}', video_info[2]))

	with open(filename, 'w', encoding='utf-8') as file:
	file.write(document_template.split('{playlists}')[0])
	for playlist_title in playlist_infos:
	append_playlist(file, playlist_title, playlist_infos[playlist_title])
	file.write(document_template.split('{playlists}')[1])

	print('STARTING\n')

	playlist_infos = dict()

	playlist_files = get_playlist_files(playlists_folder)
	playlist_count = len(playlist_files)
	for i in range(playlist_count):
	playlist_file = playlist_files[i]
	playlist_title = get_playlist_title(playlist_file)
	print('Assembling playlist ({}/{}) :{}'.format(i + 1, playlist_count, playlist_title))
	video_ids = get_playlist_video_ids(playlist_file)
	video_infos = []
	for id in video_ids:
	video_info = get_video_info(id)
	if video_info is not None:
	print(' - {} by {}'.format(video_info[1], video_info[2]))
	video_infos.append(video_info)
	playlist_infos.update({playlist_title: video_infos})

	export_html(playlist_infos, output_filename)

	print('\nDONE')