Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Finds YouTube videos you're interested in. PyCon US talks finder example.
import re
import requests
import html
API_KEY = ''
'''Google API (YouTube Data API v3) key from https://console.developers.google.com/apis/.'''
# Put titles you're interested into RELEVANT string:
# one title per line. These are regular expressions.
# NOTE: escape (), [], etc.
RELEVANT = '''
'''
CHANNEL = 'UCxs2IIVXaEHHA4BtTiWZ2mQ'
'''YouTube channel ID here.'''
TITLE_POSTFIX = 'PyCon 2019'
'''Postfix to strip from titles.'''
#############################################
_SPACES = re.compile('\s+')
_BASE_URL = 'https://www.googleapis.com/youtube/v3/search?order=date&part=snippet&channelId=%(channel)s&maxResults=50&key=%(key)s%(page)s'
def traverse(page=0):
params = {
'channel': CHANNEL,
'key': API_KEY,
'page': '',
}
if page:
params['page'] = '&pageToken=%s' % page
url = _BASE_URL % params
response = requests.get(url)
json = response.json()
error = json.get('error')
if error:
raise Exception(error['message'])
next_page = json.get('nextPageToken')
for item in json['items']:
if item['id']['kind'] != 'youtube#video':
continue
video_id = item['id']['videoId']
title = html.unescape(item['snippet']['title'])
title = title.replace(TITLE_POSTFIX, '').strip(' -')
split = title.split(' - ', 1)
prefix = split[0]
prefix = prefix.replace('/', ',')
if len(prefix.split(' ')) in {2, 3} or (',' in prefix):
# strip person name
try:
title = split[1]
except IndexError:
pass
title = _SPACES.sub(' ', title)
title = title.strip(' -')
yield video_id, title
if next_page:
yield from traverse(page=next_page)
def find_relevant():
print('Channel: https://www.youtube.com/channel/%s/videos?view=0&sort=dd&flow=list\n' % CHANNEL)
relevant_lines = []
for line in RELEVANT.splitlines():
line = line.strip()
if line:
line = _SPACES.sub(' ', line)
relevant_lines.append(line)
total_relevant = len(relevant_lines)
total_traversed = 0
seen = []
traversed = [item for item in traverse()][::-1] # eldest first
len_traversed = len(str(len(traversed)))
for video_id, title in traversed:
total_traversed += 1
matched = False
for line in relevant_lines:
matched = re.match(line, title)
if matched:
relevant_lines.remove(line)
url = 'https://youtu.be/%s' % video_id
seen.append((title, url, bool(matched)))
for idx, seen_item in enumerate(sorted(seen), 1):
title, url, matched = seen_item
print('%s. %s%s' % (str(idx).zfill(len_traversed), '[!] ' if matched else '', title))
if url:
print('%s%s' % (' ' * (len_traversed + 2), url))
total_missing = len(relevant_lines)
print(
'\nSummary: among %s found %s of %s, missing %s\n' % (
total_traversed,
total_relevant - total_missing,
total_relevant,
total_missing
))
print('====' * 20)
print('Missing:\n')
for idx, line in enumerate(sorted(relevant_lines), 1):
print('%s. %s' % (idx, line))
find_relevant()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment