Skip to content

Instantly share code, notes, and snippets.

@rubenhorn
Last active March 18, 2021 15:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rubenhorn/c71d82adba694c6211f7a1f14dc79bb3 to your computer and use it in GitHub Desktop.
Save rubenhorn/c71d82adba694c6211f7a1f14dc79bb3 to your computer and use it in GitHub Desktop.
A script to process youtube playlist takeout
#!/usr/bin/python3
import os, sys, requests, re, html
banner = '''
#==================================================
# A script to process youtube playlist takeout
#==================================================
'''
print(banner)
if len(sys.argv) != 3:
print('Usage: {} <takeout playlists path> <output file>'.format(sys.argv[0]))
exit()
playlists_folder = os.path.abspath(sys.argv[1])
output_filename = os.path.abspath(sys.argv[2])
def get_playlist_files(playlists_folder):
csv_files = filter(lambda f: f.endswith('.csv'), os.listdir(playlists_folder))
return [playlists_folder + '/' + f for f in csv_files]
VIDEO_BASE_URL = 'https://www.youtube.com/watch?v='
def get_playlist_title(playlist_file):
title = None
with open(playlist_file, 'r') as file:
file.readline()
title = file.readline().split(',')[4]
return title
def get_playlist_video_ids(playlist_file):
lines = []
with open(playlist_file, 'r') as file:
lines += file.readlines()
lines = lines[5:]
return [l.split(',')[0] for l in lines]
def get_video_info(video_id):
try:
request = requests.get(VIDEO_BASE_URL + video_id)
text = request.text
pattern_title = '<meta name="title" content="([^"]*)">'
title = html.unescape(re.search(pattern_title, text).group(1))
pattern_channel_name = '"ownerChannelName":"([^"]*)"'
channel_name = html.unescape(re.search(pattern_channel_name, text).group(1))
return (video_id, title, channel_name)
except:
return None
def export_html(playlist_infos, filename):
document_template = '''
<!DOCTYPE html>
<html>
<head>
<title>YouTube playlists</title>
</head>
<body>
<span id="top"></span>
<a href="#top" style="position: fixed; right: 10px; bottom: 10px">Go to top</a>
<iframe name="player" style="position: fixed; right: 10px; top: 10px"></iframe>
{playlists}
<script>Array.from(document.getElementsByTagName("h3")).forEach(e =>{e.onclick=()=>{e.nextElementSibling.hidden=!e.nextElementSibling.hidden;};e.onclick()})</script>
</body>
</html>
'''.strip()
def append_playlist(file, title, video_infos):
playlist_template = '''
<h3>{title}</h3>
<ul>{videos}</ul>
'''.strip()
file.write(playlist_template.split('{videos}')[0].replace('{title}', title))
for video_info in video_infos:
append_video(file, video_info)
file.write(playlist_template.split('{videos}')[1])
def append_video(file, video_info):
video_template = '''
<li>
(<a href="https://www.youtube.com/embed/{video_id}?autoplay=1" target="player">
Play
</a>) <a href="{base_url}{video_id}" target="_blank">
{title}
</a> - {channel_name}
</li>
'''.strip().replace('{base_url}', VIDEO_BASE_URL)
file.write(video_template.replace('{video_id}', video_info[0]).replace('{title}', video_info[1]).replace('{channel_name}', video_info[2]))
with open(filename, 'w', encoding='utf-8') as file:
file.write(document_template.split('{playlists}')[0])
for playlist_title in playlist_infos:
append_playlist(file, playlist_title, playlist_infos[playlist_title])
file.write(document_template.split('{playlists}')[1])
print('STARTING\n')
playlist_infos = dict()
playlist_files = get_playlist_files(playlists_folder)
playlist_count = len(playlist_files)
for i in range(playlist_count):
playlist_file = playlist_files[i]
playlist_title = get_playlist_title(playlist_file)
print('Assembling playlist ({}/{}) :{}'.format(i + 1, playlist_count, playlist_title))
video_ids = get_playlist_video_ids(playlist_file)
video_infos = []
for id in video_ids:
video_info = get_video_info(id)
if video_info is not None:
print(' - {} by {}'.format(video_info[1], video_info[2]))
video_infos.append(video_info)
playlist_infos.update({playlist_title: video_infos})
export_html(playlist_infos, output_filename)
print('\nDONE')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment