Created
March 31, 2022 19:34
-
-
Save fblampe/a7e1a70af87526e50715536e5b5c85c6 to your computer and use it in GitHub Desktop.
Fetches video data from YouTube API for a given channel and query, including title, description, publish date, thumbnail link and ID.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/python3 | |
import json | |
import requests | |
import sys | |
def extract_data(v): | |
"""Extract id, title and duration""" | |
id = v["id"]["videoId"] | |
title = v["snippet"]["title"] | |
description = v["snippet"]["description"][0:300] | |
publishedAt = v["snippet"]["publishedAt"] | |
thumbnail = v["snippet"]["thumbnails"]["high"]["url"] | |
return { | |
"videoId": "https://www.youtube.com/watch?v=" + id, | |
"title": title, | |
"description": description, | |
"publishedAt": publishedAt, | |
"thumbnail": thumbnail | |
} | |
def get_page(channel_id, query, from_date, to_date, page_token): | |
url = f"https://youtube.googleapis.com/youtube/v3/search?part=snippet&q={query}&channelId={channel_id}&maxResults=50&publishedAfter={from_date}&publishedBefore={to_date}&key={api_key}&pageToken={page_token}" | |
print("Fetching video details from " + str(url)) | |
response = requests.get(url) | |
data = json.loads(response.text) | |
# format and show entire response including all the irrelevant stuff | |
# print(str(json.dumps(data, indent=2))) | |
return data | |
def get_data(channel_id, query, from_date, to_date): | |
video_data = [] | |
next_page_token = "" | |
while True: | |
page = get_page(channel_id, query, from_date, to_date, next_page_token) | |
video_data.extend(list(map(extract_data, page["items"]))) | |
if not "nextPageToken" in page: | |
break | |
else: | |
next_page_token = page["nextPageToken"] | |
return video_data | |
api_key = "<replace with your youtube API key>" | |
print("Usage: python get_youtube_data.py <query word (or words in \"\")> <from date (like 2020-11-01T00:00:00Z)> <to date>") | |
channel_id = "<ID of channel to fetch (found in channel URL)>" | |
query = sys.argv[1] | |
from_date = sys.argv[2] | |
to_date = sys.argv[3] | |
items = get_data(channel_id, query, from_date, to_date) | |
items = list(filter (lambda item: "<title filter term>" in item["title"], items)) | |
items.sort(key = lambda item: item["publishedAt"]) | |
print("") | |
print("Found " + str(len(items)) + " videos.") | |
video_data = (json.dumps(items, indent=2)) | |
output_file = open("video_data.json", "w") | |
output_file.write(video_data) | |
output_file.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment