Skip to content

Instantly share code, notes, and snippets.

@eggman
Created April 9, 2020 01:20
Show Gist options
  • Save eggman/894cd0d2b4162abd07a48ad8540fb742 to your computer and use it in GitHub Desktop.
Save eggman/894cd0d2b4162abd07a48ad8540fb742 to your computer and use it in GitHub Desktop.

Get YouTube channel all public playlist items

./get_ch_playlists.py

store json and csv

/csv/
    /channel_id/
               /date/
                    /channel_playlists.csv
                    /playlist_id.csv
/json/
     /channel_id/
                /date/
                     /channel_playlists.json
                     /playlist_id.json

select channel_playlists.csv item for diff.

  • publishedAt リスト作成日
  • id
  • itemCount
  • title

select playlist_id.csv item for diff.

  • publishedAt アイテム追加日
  • id
  • videoId
  • title

prepare

install googleapiclient

$ pip3 install google-api-client

config : api_key and channel list

$ copy > config.json
{
  "api_key" : "your_api_key",
  "channel" : "channel_id"
}
# -*- coding: utf-8 -*-
import os
import googleapiclient.discovery
import googleapiclient.errors
import json
import pandas as pd
import pprint
import datetime
scopes = ["https://www.googleapis.com/auth/youtube.readonly"]
def get_playlists(youtube, ch):
token=''
l = []
while True:
request = youtube.playlists().list(
part="snippet,contentDetails",
channelId=ch,
maxResults=25,
pageToken=token
)
response = request.execute()
pprint.pprint(response['pageInfo'])
d = response['items']
l.extend(d)
# if exist next page
if response.get('nextPageToken'):
token = response['nextPageToken']
else:
break
return l
def get_playlist_items(youtube, plid):
token = ''
l = []
#df = pd.DataFrame(index=[])
while True:
request = youtube.playlistItems().list(
part="snippet",
playlistId=plid,
maxResults=50,
pageToken=token
)
response = request.execute()
pprint.pprint(response['pageInfo'])
#pprint.pprint(response)
d = response['items']
#pprint.pprint(d)
#df = df.append(pd.DataFrame(d))
l.extend(d)
# if exist next page
if response.get('nextPageToken'):
token = response['nextPageToken']
else:
break
#pprint.pprint(l)
#break # for debug
return l
def save_channel_playlists(csvp, jsonp, playlists):
json_path = jsonp + '/channel_playlists.json'
f = open(json_path,'w')
json.dump(playlists, f, indent=4, ensure_ascii=False)
csv_path = csvp + '/channel_playlists.csv'
l = []
for i in playlists:
#pprint.pprint(i)
d = dict(publishedAt = i['snippet']['publishedAt'],
id = i['id'],
itemCount = i['contentDetails']['itemCount'],
title = i['snippet']['title'])
l.append(d)
pd.DataFrame(l).to_csv(csv_path, index=False)
def save_playlist(csvp, jsonp, plid, items):
json_path = jsonp +'/' + plid + ".json"
f = open(json_path,'w')
json.dump(items, f, indent=4, ensure_ascii=False)
csv_path = csvp + '/' + plid + ".csv"
l = []
for i in items:
#pprint.pprint(i)
d = dict(publishedAt = i['snippet']['publishedAt'],
id = i['id'],
videoId = i['snippet']['resourceId']['videoId'],
title = i['snippet']['title'] )
l.append(d)
#pprint.pprint(l)
pd.DataFrame(l).to_csv(csv_path, index=False)
def main():
api_service_name = "youtube"
api_version = "v3"
json_open = open('config.json', 'r')
json_load = json.load(json_open)
api_key = json_load['api_key']
ch = json_load['channel']
print(ch)
youtube = googleapiclient.discovery.build(
api_service_name, api_version, developerKey=api_key)
date = datetime.datetime.now().strftime("%Y%m%d")
csv_path = 'csv/' + ch + '/' + date
os.makedirs(csv_path, exist_ok=True)
json_path = 'json/' + ch + '/' + date
os.makedirs(json_path, exist_ok=True)
playlists = get_playlists(youtube, ch)
#pprint.pprint(playlists)
save_channel_playlists(csv_path, json_path, playlists)
#plid = playlists[1]['id']
for p in playlists:
plid = p['id']
print(plid)
items = get_playlist_items(youtube, plid)
#pprint.pprint(items)
save_playlist(csv_path, json_path, plid, items)
#break # for debug
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment