Skip to content

Instantly share code, notes, and snippets.

@fblampe
Last active October 22, 2021 23:24
Show Gist options
  • Save fblampe/2e734943a2b8e19386eadd783ee16486 to your computer and use it in GitHub Desktop.
Save fblampe/2e734943a2b8e19386eadd783ee16486 to your computer and use it in GitHub Desktop.
#!/bin/python3
import json
import requests
import isodate
def extract_data(json_data):
"""Extract id, title and duration"""
id = json_data["id"]
title = json_data["snippet"]["title"]
description = json_data["snippet"]["description"][0:120]
publishedAt = json_data["snippet"]["publishedAt"]
duration_iso = json_data["contentDetails"]["duration"]
# convert stuff like "PT3M14S" to 03:14 - crashes for videos longer than an hour...
duration_numbers = duration_iso[2:-1].split("M")
duration = isodate.parse_duration(duration_iso)
return {
"videoId": id, "title": title, "description": description, "publishedAt": publishedAt, "duration": str(duration)
}
def get_data(video_ids):
url = f"https://www.googleapis.com/youtube/v3/videos?part=snippet,contentDetails&id={','.join(video_ids)}&key={api_key}"
print(str(url))
response = requests.get(url)
data = json.loads(response.text)
# format and show entire response including all the irrelevant stuff
#print(str(json.dumps(data, indent=2)))
return list(map(extract_data, data["items"]))
api_key = "your-api-key"
ids_file = open('video_ids.txt', 'r')
video_id_lines = ids_file.readlines()
video_ids = list(map(str.strip, video_id_lines))
video_data = ""
for i in range(0, len(video_ids), 50):
next_fifty_ids = video_ids[i:i + 50]
video_data += (json.dumps(get_data(next_fifty_ids), indent=2))
output_file = open("video_data.json", "w")
output_file.write(video_data)
output_file.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment