fblampe/get_youtube_data.py

## get_youtube_data.py
#!/bin/python3

import json
import requests
import sys

def extract_data(v):
    """Extract id, title and duration"""
    id = v["id"]["videoId"]
    title = v["snippet"]["title"]
    description = v["snippet"]["description"][0:300]
    publishedAt = v["snippet"]["publishedAt"]
    thumbnail = v["snippet"]["thumbnails"]["high"]["url"]

    return {
        "videoId": "https://www.youtube.com/watch?v=" + id,
        "title": title,
        "description": description,
        "publishedAt": publishedAt,
        "thumbnail": thumbnail
    }

def get_page(channel_id, query, from_date, to_date, page_token):
    url = f"https://youtube.googleapis.com/youtube/v3/search?part=snippet&q={query}&channelId={channel_id}&maxResults=50&publishedAfter={from_date}&publishedBefore={to_date}&key={api_key}&pageToken={page_token}"
    print("Fetching video details from " + str(url))
    response = requests.get(url)

    data = json.loads(response.text)

    # format and show entire response including all the irrelevant stuff
    # print(str(json.dumps(data, indent=2)))

    return data

def get_data(channel_id, query, from_date, to_date):
    video_data = []
    next_page_token = ""
    while True:
        page = get_page(channel_id, query, from_date, to_date, next_page_token)
        video_data.extend(list(map(extract_data, page["items"])))
        if not "nextPageToken" in page:
            break
        else:
            next_page_token = page["nextPageToken"]

    return video_data

api_key = "<replace with your youtube API key>"

print("Usage: python get_youtube_data.py <query word (or words in \"\")> <from date (like 2020-11-01T00:00:00Z)> <to date>")

channel_id = "<ID of channel to fetch (found in channel URL)>"
query = sys.argv[1]
from_date = sys.argv[2]
to_date = sys.argv[3]

items = get_data(channel_id, query, from_date, to_date)
items = list(filter (lambda item: "<title filter term>" in item["title"], items))
items.sort(key = lambda item: item["publishedAt"])
print("")
print("Found " + str(len(items)) + " videos.")

video_data = (json.dumps(items, indent=2))

output_file = open("video_data.json", "w")
output_file.write(video_data)
output_file.close()
	#!/bin/python3

	import json
	import requests
	import sys

	def extract_data(v):
	"""Extract id, title and duration"""
	id = v["id"]["videoId"]
	title = v["snippet"]["title"]
	description = v["snippet"]["description"][0:300]
	publishedAt = v["snippet"]["publishedAt"]
	thumbnail = v["snippet"]["thumbnails"]["high"]["url"]

	return {
	"videoId": "https://www.youtube.com/watch?v=" + id,
	"title": title,
	"description": description,
	"publishedAt": publishedAt,
	"thumbnail": thumbnail
	}

	def get_page(channel_id, query, from_date, to_date, page_token):
	url = f"https://youtube.googleapis.com/youtube/v3/search?part=snippet&q={query}&channelId={channel_id}&maxResults=50&publishedAfter={from_date}&publishedBefore={to_date}&key={api_key}&pageToken={page_token}"
	print("Fetching video details from " + str(url))
	response = requests.get(url)

	data = json.loads(response.text)

	# format and show entire response including all the irrelevant stuff
	# print(str(json.dumps(data, indent=2)))

	return data

	def get_data(channel_id, query, from_date, to_date):
	video_data = []
	next_page_token = ""
	while True:
	page = get_page(channel_id, query, from_date, to_date, next_page_token)
	video_data.extend(list(map(extract_data, page["items"])))
	if not "nextPageToken" in page:
	break
	else:
	next_page_token = page["nextPageToken"]

	return video_data

	api_key = "<replace with your youtube API key>"

	print("Usage: python get_youtube_data.py <query word (or words in \"\")> <from date (like 2020-11-01T00:00:00Z)> <to date>")

	channel_id = "<ID of channel to fetch (found in channel URL)>"
	query = sys.argv[1]
	from_date = sys.argv[2]
	to_date = sys.argv[3]

	items = get_data(channel_id, query, from_date, to_date)
	items = list(filter (lambda item: "<title filter term>" in item["title"], items))
	items.sort(key = lambda item: item["publishedAt"])
	print("")
	print("Found " + str(len(items)) + " videos.")

	video_data = (json.dumps(items, indent=2))

	output_file = open("video_data.json", "w")
	output_file.write(video_data)
	output_file.close()