Skip to content

Instantly share code, notes, and snippets.

@randompast
Last active August 24, 2021 00:09
Show Gist options
  • Save randompast/7db4f3824615a24809b1e8522b75e55e to your computer and use it in GitHub Desktop.
Save randompast/7db4f3824615a24809b1e8522b75e55e to your computer and use it in GitHub Desktop.
Paper Title to TMP Video Link
API_KEY = 'your_key_here'
# https://developers.google.com/youtube/v3/docs/playlistItems/list?hl=en&apix_params=%7B%22part%22%3A%5B%22snippet.publishedAt%22%2C%22snippet.title%22%2C%22snippet.description%22%5D%2C%22playlistId%22%3A%22UUbfYPyITQ-7l4upoX8nvctg%22%7D
# -*- coding: utf-8 -*-
# Sample Python code for youtube.playlistItems.list
# See instructions for running these code samples locally:
# https://developers.google.com/explorer-help/guides/code_samples#python
import os
import pickle
import re
import googleapiclient.discovery
def main():
# Disable OAuthlib's HTTPS verification when running locally.
# *DO NOT* leave this option enabled in production.
os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = API_KEY
youtube = googleapiclient.discovery.build(
api_service_name, api_version, developerKey = DEVELOPER_KEY)
pageToken = ''
data = []
while True:
request = youtube.playlistItems().list(
part="snippet",
playlistId="UUbfYPyITQ-7l4upoX8nvctg",
maxResults="10000",
pageToken=pageToken
)
print('obtaining', pageToken)
response = request.execute()
data += [response]
if 'nextPageToken' not in response:
break
else:
pageToken = response['nextPageToken']
pickle.dump(data, open('output_all.p', 'wb'))
def get_paper_info(d):
d = re.sub('[\r\n]', ' ', d) #remove instances where paper title spans 2+ lines
matches=re.findall(r'paper \"(.+?)\"',d)
papers = ",".join(matches)
if len(papers) == 0:
matches=re.findall(r'paper (.+?)\"',d)
papers = ",".join(matches)
# print([d, papers])
return papers if len(papers) > 0 else "missing"
def print_page(p, acc):
# default_url = 'https://www.youtube.com/watch?v='
default_url = 'http://y2u.be/'
for i in p['items']:
vid = i['snippet']
vid_published = vid['publishedAt']
vid_title = vid['title']
d = vid['description']
paper_titles = get_paper_info(d)
vid_url = default_url + vid['resourceId']['videoId']
acc['videos'] += [[vid_published, vid_title, vid_url, paper_titles, d]]
def print_output():
data = pickle.load(open('output_all.p', 'rb'))
acc = {'videos' : []}
for i in data:
print_page(i,acc)
print( len( [ i for i in acc['videos'] if i[3] == 'missing' ] ) )
for i, v in enumerate( acc['videos'] ):
# if v[3] == 'missing' :
print( v[0][:10], '[{0}]({1})'.format(v[1], v[2]) )
print(' - {0}'.format(v[3]) )
# print( v[-1] )
print()
if __name__ == "__main__":
# main()
print_output()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment