|
# -*- coding: utf-8 -*- |
|
|
|
import os |
|
|
|
import googleapiclient.discovery |
|
import googleapiclient.errors |
|
import json |
|
import pandas as pd |
|
import pprint |
|
import datetime |
|
|
|
scopes = ["https://www.googleapis.com/auth/youtube.readonly"] |
|
|
|
def get_playlists(youtube, ch): |
|
|
|
token='' |
|
l = [] |
|
|
|
while True: |
|
|
|
request = youtube.playlists().list( |
|
part="snippet,contentDetails", |
|
channelId=ch, |
|
maxResults=25, |
|
pageToken=token |
|
) |
|
response = request.execute() |
|
|
|
pprint.pprint(response['pageInfo']) |
|
d = response['items'] |
|
l.extend(d) |
|
|
|
# if exist next page |
|
if response.get('nextPageToken'): |
|
token = response['nextPageToken'] |
|
else: |
|
break |
|
|
|
return l |
|
|
|
def get_playlist_items(youtube, plid): |
|
|
|
token = '' |
|
l = [] |
|
#df = pd.DataFrame(index=[]) |
|
|
|
while True: |
|
request = youtube.playlistItems().list( |
|
part="snippet", |
|
playlistId=plid, |
|
maxResults=50, |
|
pageToken=token |
|
) |
|
response = request.execute() |
|
|
|
pprint.pprint(response['pageInfo']) |
|
#pprint.pprint(response) |
|
d = response['items'] |
|
|
|
#pprint.pprint(d) |
|
#df = df.append(pd.DataFrame(d)) |
|
l.extend(d) |
|
|
|
# if exist next page |
|
if response.get('nextPageToken'): |
|
token = response['nextPageToken'] |
|
else: |
|
break |
|
|
|
#pprint.pprint(l) |
|
#break # for debug |
|
|
|
return l |
|
|
|
def save_channel_playlists(csvp, jsonp, playlists): |
|
|
|
json_path = jsonp + '/channel_playlists.json' |
|
f = open(json_path,'w') |
|
json.dump(playlists, f, indent=4, ensure_ascii=False) |
|
|
|
csv_path = csvp + '/channel_playlists.csv' |
|
|
|
l = [] |
|
for i in playlists: |
|
#pprint.pprint(i) |
|
d = dict(publishedAt = i['snippet']['publishedAt'], |
|
id = i['id'], |
|
itemCount = i['contentDetails']['itemCount'], |
|
title = i['snippet']['title']) |
|
l.append(d) |
|
|
|
pd.DataFrame(l).to_csv(csv_path, index=False) |
|
|
|
def save_playlist(csvp, jsonp, plid, items): |
|
|
|
json_path = jsonp +'/' + plid + ".json" |
|
f = open(json_path,'w') |
|
json.dump(items, f, indent=4, ensure_ascii=False) |
|
|
|
csv_path = csvp + '/' + plid + ".csv" |
|
|
|
l = [] |
|
for i in items: |
|
#pprint.pprint(i) |
|
d = dict(publishedAt = i['snippet']['publishedAt'], |
|
id = i['id'], |
|
videoId = i['snippet']['resourceId']['videoId'], |
|
title = i['snippet']['title'] ) |
|
l.append(d) |
|
|
|
#pprint.pprint(l) |
|
pd.DataFrame(l).to_csv(csv_path, index=False) |
|
|
|
def main(): |
|
|
|
api_service_name = "youtube" |
|
api_version = "v3" |
|
|
|
json_open = open('config.json', 'r') |
|
json_load = json.load(json_open) |
|
api_key = json_load['api_key'] |
|
ch = json_load['channel'] |
|
print(ch) |
|
|
|
youtube = googleapiclient.discovery.build( |
|
api_service_name, api_version, developerKey=api_key) |
|
|
|
date = datetime.datetime.now().strftime("%Y%m%d") |
|
|
|
csv_path = 'csv/' + ch + '/' + date |
|
os.makedirs(csv_path, exist_ok=True) |
|
json_path = 'json/' + ch + '/' + date |
|
os.makedirs(json_path, exist_ok=True) |
|
|
|
playlists = get_playlists(youtube, ch) |
|
#pprint.pprint(playlists) |
|
save_channel_playlists(csv_path, json_path, playlists) |
|
|
|
#plid = playlists[1]['id'] |
|
|
|
for p in playlists: |
|
plid = p['id'] |
|
print(plid) |
|
items = get_playlist_items(youtube, plid) |
|
#pprint.pprint(items) |
|
save_playlist(csv_path, json_path, plid, items) |
|
|
|
#break # for debug |
|
|
|
if __name__ == "__main__": |
|
main() |