Skip to content

Instantly share code, notes, and snippets.

@Rainyan
Last active February 11, 2022 16:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Rainyan/55b5aa869aa34e1d4f3398c8556bd51e to your computer and use it in GitHub Desktop.
Save Rainyan/55b5aa869aa34e1d4f3398c8556bd51e to your computer and use it in GitHub Desktop.
This Python script fetches simple YT API metadata about channels' latest uploads, and outputs it to screen as text. Note that Youtube also exposes RSS feeds for this, so you're probably better off using that. This was just made for fun.
#!/usr/bin/env python
"""Get results for multiple pre-defined Youtube channels at once.
Can either use channel names or the "UC..." channel id scheme.
"""
from sys import platform
import yt_activity
YOUTUBE_CHANNELS = {
'Channel name': 'vanity name 1', # youtube.com/c/<...>, or youtube.com/user/<...>
'UC123456789ABCDEFGHIJKLM': 'vanity name 2', # channel UC id, youtube.com/channel/<...>
}
NUM_VIDS_TO_GET = 2 # How many videos to get per channel.
COLORS = yt_activity.Colors()
# We don't have terminal color support on Windows
# TODO: replace with some color library like Colorama
if not platform.startswith("linux"):
COLORS.cyan = ""
COLORS.greybg = ""
COLORS.reset = ""
for channel, vanityname in YOUTUBE_CHANNELS.items():
print(COLORS.greybg + "-- " + vanityname + " --" + COLORS.reset)
yt_activity.get_latest_channel_vids_info(channel, NUM_VIDS_TO_GET)
Insert your Youtube's OAuth API key here. This file needs to have only 1 line. To generate an API key, see: https://console.developers.google.com/apis/api/youtube.googleapis.com
#!/usr/bin/env python3
from datetime import datetime
from sys import platform
import json
import os
import re
import sys
import urllib.request
from urllib.parse import unquote
DESCRIPTION = """This Python script fetches simple metadata about
a Youtube channel's latest uploads, and outputs it
to screen as text."""
# Note that Youtube also exposes RSS feeds for this, so you're
# probably better off using that. This script was just made for fun.
# Copyright 2020 https://github.com/Rainyan
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# To generate a Youtube OAuth token, see:
# https://console.developers.google.com/apis/api/youtube.googleapis.com
# This file should have no empty newlines
OAUTH_FILE_PATH = os.path.dirname(os.path.realpath(__file__)) + "/youtube_oauth_token.txt"
# Don't set manually; this gets set in set_oauth_token() by reading from OAUTH_FILE_PATH.
YOUTUBE_OAUTH_TOKEN = ""
# How many videos to fetch from Youtube API.
MAX_RESULTS = 5
# The max item fetch count allowed by the Youtube API.
# Should be set as the max value of maxResults in:
# https://developers.google.com/youtube/v3/docs/activities/list
MAX_RESULTS_SUPPORTED = 50
# Where to cut off lengthy video descriptions.
TRUNCATE_DESCRIPTION_LENGTH=125
# Encoding used to decode bytes objects into strings.
ENCODING = "utf-8"
# TODO: Use colorama or other library for handling this
class Colors:
"""Terminal Colors.
For more info: https://en.wikipedia.org/wiki/ANSI_escape_code
"""
cyan = '\033[96m'
greybg = '\033[37;100m'
reset = '\033[0m'
def set_oauth_token():
"""Read and set Youtube OAuth token."""
with open(OAUTH_FILE_PATH, "r", encoding=ENCODING) as file_oauth_token:
global YOUTUBE_OAUTH_TOKEN
YOUTUBE_OAUTH_TOKEN = file_oauth_token.read().strip()
def script_entrypoint():
"""Entry point, if run as a Python script.
"""
cmd_line_args = len(sys.argv)
# No args passed. Print description and prompt for user input.
if cmd_line_args == 1:
print(DESCRIPTION + "\n")
# For Python 2, use raw_input
channel_name = input("Please enter a Youtube channel name: ")
get_latest_channel_vids_info(channel_name)
# One argument passed. Treat as channel name.
elif cmd_line_args == 2:
get_latest_channel_vids_info(sys.argv[1])
# Two arguments passed. Second is num results to fetch.
elif cmd_line_args == 3:
get_latest_channel_vids_info(sys.argv[1], int(sys.argv[2]))
# Unsupported argument count. Explain syntax to user.
else:
print("Usage: yt_activity.py (optional) \"<Youtube username or "
"channel name>\" (optional) \"<Number of recent videos to "
"fetch>\"")
# TODO: Switch to YT's video search API: https://stackoverflow.com/a/30183877
def get_latest_channel_vids_info(channel_name, num_results = MAX_RESULTS):
"""Prints all the videos information.
Expects a YT username, ie. the bracketed part of youtube.com/user/<Username>.
Bracketed part of youtube.com/channel/<Channelname> will also parse successfully.
"""
# MAX_RESULTS_SUPPORTED should reflect the max allowed value of maxResults
# in: https://developers.google.com/youtube/v3/docs/activities/list
if num_results > MAX_RESULTS_SUPPORTED or num_results <= 0:
raise ValueError("MAX_RESULTS has to be within API's acceptable range "
"defined by MAX_RESULTS_SUPPORTED")
# Strip any surrounding quotes.
if channel_name.startswith('"') and channel_name.endswith('"'):
channel_name = channel_name[1:-1]
# Parse name to be url safe
channel_name = urllib.parse.quote(channel_name)
print(f"Fetching {num_results} recent videos by {unquote(channel_name)}..."
"\n")
channel_id = get_channel_id(channel_name)
if channel_id is None:
print("No channel found with name " + unquote(channel_name))
return
# TODO/HACK This set isn't actually used for anything other than counting
video_results = set()
iterations = 0
bail_out_iterations = 10
channel_overview_result = ""
while len(video_results) < num_results:
#print("Iteration", iterations, "with video count:", len(video_results))
# Safeguard against endless looping if the API doesn't have enough results.
iterations += 1
if iterations >= bail_out_iterations:
break
# In order to fill the set with relevant upload items,
# we have to keep asking for MAX_RESULTS + len(set) results.
results_to_get = MAX_RESULTS + len(video_results)
channel_overview_url = parse_yt_url_channel_overview(channel_id, results_to_get)
channel_overview_result = youtube_api_request(channel_overview_url)
if channel_overview_result is None:
return
filtered_items = filter_json_upload_items(channel_overview_result)
for item in filtered_items:
video_results.add(str(item))
#print("Exited loop after", iterations, "iterations with video count:", len(video_results))
if len(channel_overview_result) == 0:
raise ValueError('Failed to get any results from the Youtube API. '
'Make sure var "bail_out_iterations" has a '
f'reasonable value (is {bail_out_iterations}), '
'and increase if necessary.')
#else:
# print("We exited with %i res. Dump:\n\n" % len(channel_overview_result))
# print(channel_overview_result)
videos_list = parse_video_list(channel_overview_result)
videos_info_url = parse_yt_url_video_info(videos_list, num_results)
videos_result = youtube_api_request(videos_info_url)
if videos_result is None:
return
print_video_snippets_info(videos_result, num_results)
def filter_json_upload_items(json_string):
"""Take a json string returned from Youtube API, and filter out
any items that are not video uploads, ie. playlist actions etc.
"""
j = json.loads(json_string)
filtered_items = []
for item in j["items"]:
if "upload" in item["contentDetails"]:
filtered_items.append(item)
# else:
# print("Filtering: " + item)
return filtered_items
def youtube_api_request(full_url):
"""Returns a string json response from the Youtube API.
Expects a full API url, including the OAuth token.
Returns a string on success and None on connection timeout.
"""
request = urllib.request.Request(full_url)
try:
with urllib.request.urlopen(request) as response:
return response.read().decode(ENCODING)
except urllib.error.URLError as err:
# FIXME: platform specific errno code; use something else here
if err.errno == -2: # [Errno -2] Name or service not known
print("Youtube API request failed: " + err.reason.strerror)
print("This may be due to a connection timeout.")
else:
raise
return None
def print_video_snippets_info(json_string, num_results = MAX_RESULTS):
"""Prints snippet info from the Youtube videos
indicated by the Youtube API's JSON string.
"""
j = json.loads(json_string)
item_count = 0
for item in j["items"]:
# Break if we somehow ended up with more videos than we wanted
item_count += 1
if item_count > num_results:
break
url = "https://www.youtube.com/watch?v=" + item["id"]
timestamp = item["snippet"]["publishedAt"]
# Youtube format: 2022-02-07T01:15:00Z
date = datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%SZ")
date_string_formatted = date.strftime("%Y-%m-%d")
title = item["snippet"]["title"]
description = truncate_string(item["snippet"]["description"])
print(Colors.cyan + date_string_formatted + " -- " + title + Colors.reset)
print(url)
print("Description: " + description + "\n")
def get_channel_id(channel_name):
"""Converts a Youtube channel name into the
"UC..." channelid format needed for Youtube API calls.
"""
plausible_path_segments = [ "c", "user" ]
for segment in plausible_path_segments:
request = urllib.request.Request(f"https://www.youtube.com/{segment}/{channel_name}")
try:
with urllib.request.urlopen(request) as response:
result = response.read().decode(ENCODING)
# TODO/HACK We're just grabbing the id directly from the page
# HTML with regex. Kind of hacky, but I couldn't find any
# interface to neatly retrieve this based on channel name.
# This might break if/when Youtube page design changes.
# There's probably an API somewhere for doing this in a better
# way.
regex_sorcery = "meta itemprop=\"channelId\" content=\"(.*?)\">"
m = re.search (regex_sorcery, result, re.MULTILINE)
return m.group(1)
except urllib.error.HTTPError as err:
if err.code != 404:
raise
# If the channel name isn't found, and it already starts with "UC...",
# instead of failing, assume it is already a channelid.
if channel_name[:2] == "UC":
return channel_name
return None
def truncate_string(target_string, max_length=TRUNCATE_DESCRIPTION_LENGTH,
truncate_symbol="..."):
"""Truncate string at certain length.
"""
if len(target_string) > max_length + len(truncate_symbol):
return target_string[:max_length] + truncate_symbol
return target_string
def parse_video_list(json_string):
"""Returns a comma-separated string of YT video ids.
Expects a Youtube API json response string.
"""
video_list = ""
j = json.loads(json_string)
yt_api_delimiter = ","
for item in j["items"]:
# Some YT channel activity items do not follow this structure, so we check.
try:
if "upload" in item["contentDetails"]:
video_list += item["contentDetails"]["upload"]["videoId"] + yt_api_delimiter
except:
continue
# Remove any trailing delimiter
if video_list.endswith(yt_api_delimiter):
video_list = video_list[:-len(yt_api_delimiter)]
return video_list
def parse_url_parameters(url, parameters):
"""Returns a full YT url with parameters appended to it.
"""
parsed_params = "?"
for parameter in parameters:
if len(parsed_params) > 1:
parsed_params += "&"
parsed_params += parameter + "=" + parameters[parameter]
return url + parsed_params
def parse_yt_url_video_info(video_id_list, max_res = MAX_RESULTS):
"""Returns a full API url for Youtube video info.
Expects a comma-separated string of video ids for "video_id_list".
"""
# API details: https://developers.google.com/youtube/v3/docs/videos/list
url = "https://www.googleapis.com/youtube/v3/videos"
parameters = {
"key": YOUTUBE_OAUTH_TOKEN,
"part": "snippet",
"id": video_id_list,
"maxResults": str(min(MAX_RESULTS, max_res)),
}
return parse_url_parameters(url, parameters)
def parse_yt_url_channel_overview(channel_id, max_res = MAX_RESULTS):
"""Returns a full API url for Youtube channel info.
"""
# API details: https://developers.google.com/youtube/v3/docs/activities/list
url = "https://www.googleapis.com/youtube/v3/activities"
parameters = {
"key": YOUTUBE_OAUTH_TOKEN,
"part": "contentDetails",
"channelId": channel_id,
"maxResults": str(min(MAX_RESULTS, max_res)),
}
return parse_url_parameters(url, parameters)
# We don't have terminal color support on Windows
# TODO: Handle OSX (do the colors work there?)
if not platform.startswith("linux"):
Colors.cyan = ""
Colors.greybg = ""
Colors.reset = ""
set_oauth_token()
if __name__ == '__main__':
script_entrypoint()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment