Skip to content

Instantly share code, notes, and snippets.

@fblampe
Created March 31, 2022 19:34
Show Gist options
  • Save fblampe/a7e1a70af87526e50715536e5b5c85c6 to your computer and use it in GitHub Desktop.
Save fblampe/a7e1a70af87526e50715536e5b5c85c6 to your computer and use it in GitHub Desktop.
Fetches video data from YouTube API for a given channel and query, including title, description, publish date, thumbnail link and ID.
#!/bin/python3
import json
import requests
import sys
def extract_data(v):
"""Extract id, title and duration"""
id = v["id"]["videoId"]
title = v["snippet"]["title"]
description = v["snippet"]["description"][0:300]
publishedAt = v["snippet"]["publishedAt"]
thumbnail = v["snippet"]["thumbnails"]["high"]["url"]
return {
"videoId": "https://www.youtube.com/watch?v=" + id,
"title": title,
"description": description,
"publishedAt": publishedAt,
"thumbnail": thumbnail
}
def get_page(channel_id, query, from_date, to_date, page_token):
url = f"https://youtube.googleapis.com/youtube/v3/search?part=snippet&q={query}&channelId={channel_id}&maxResults=50&publishedAfter={from_date}&publishedBefore={to_date}&key={api_key}&pageToken={page_token}"
print("Fetching video details from " + str(url))
response = requests.get(url)
data = json.loads(response.text)
# format and show entire response including all the irrelevant stuff
# print(str(json.dumps(data, indent=2)))
return data
def get_data(channel_id, query, from_date, to_date):
video_data = []
next_page_token = ""
while True:
page = get_page(channel_id, query, from_date, to_date, next_page_token)
video_data.extend(list(map(extract_data, page["items"])))
if not "nextPageToken" in page:
break
else:
next_page_token = page["nextPageToken"]
return video_data
api_key = "<replace with your youtube API key>"
print("Usage: python get_youtube_data.py <query word (or words in \"\")> <from date (like 2020-11-01T00:00:00Z)> <to date>")
channel_id = "<ID of channel to fetch (found in channel URL)>"
query = sys.argv[1]
from_date = sys.argv[2]
to_date = sys.argv[3]
items = get_data(channel_id, query, from_date, to_date)
items = list(filter (lambda item: "<title filter term>" in item["title"], items))
items.sort(key = lambda item: item["publishedAt"])
print("")
print("Found " + str(len(items)) + " videos.")
video_data = (json.dumps(items, indent=2))
output_file = open("video_data.json", "w")
output_file.write(video_data)
output_file.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment