Yiannis128/youtube-rss-subscriptions

## youtube-rss-subscriptions
#!/usr/bin/env python3

# Licence: GPLV3

import requests
from bs4 import BeautifulSoup as soup
from sys import argv as argv
from time import sleep

def get_channel_id(url):
  def get_channel_name():
    # Title is of the form of "NAME - YouTube" so it needs
    # to be extracted.
    title = parser.title.text

    assert(title != None)

    return str.replace(title, " - YouTube", "")

  channel_html = requests.get(url)

  parser = soup(channel_html.text, "html.parser")

  # Get the channel name
  channel_name = get_channel_name()

  # Loop through all the meta elements, one of them contains
  # the channel id as an attribute.
  # The meta tags with channel id are of the form:
  # <meta itemprop="channelId" content="....">
  for meta_element in parser.find_all("meta"):
    # Does the itemprop exist in this meta tag?
    if "itemprop" in meta_element.attrs.keys():
      # The meta tag that has the itemprop attribute set
      # as channelId contains the channel id.
      if meta_element.attrs["itemprop"] == "channelId":
        # Channel id found, return it.
        channel_id = meta_element.attrs["content"]

        assert(channel_id != None)

        return (channel_name, channel_id)

def get_ch_ids_list(parser):
  def get_channel_url():
    # Get the element with id main-link because it holds a href attribute
    # to the channel.
    main_link_tag = channel_html.find(id="main-link")

    # Return the channel url string
    return main_link_tag.attrs["href"]

  # Youtube does not load all channels in grid container.
  # Instead it loads multiple grid containers containing chunks
  # of the channels. This is because channels are loaded
  # as you scroll down the page.
  # So we need to parse all the grid containers.
  grid_containers = parser.find_all(id="grid-container")

  for grid_container in grid_containers:
    # Iterate through channels on the list.
    for channel_html in grid_container.children:
      # If we get an error while doing this, then try again until it succeeds.
      while True:
        try:
          channel_url = get_channel_url()

          # Get the channel id from the url.
          channel_name, channel_id = get_channel_id(channel_url)

          # Print the string to stdout
          # print("https://www.youtube.com/feeds/videos.xml?channel_id=" + channel_id, "\"~" + channel_name + "\" \"YouTube\"")
          print("#", channel_name)
          print("https://www.youtube.com/feeds/videos.xml?channel_id=" + channel_id, "\"!YouTube\"")

          # Sleep 1 second as to not piss off YT.
          sleep(1)

          # Break out of the true loop.
          break
        except Exception as error:
          # If we get an error, then retry.
          print("!Error: " + str(error), "Retrying...")

if __name__ == "__main__":
  channels_html_path = argv[1]

  print("Using html file: " + channels_html_path)

  with open(channels_html_path, "r") as file:
    print("# Youtube Subscriptions")

    parser = soup(file.read(), 'html.parser')

    channel_ids = get_ch_ids_list(parser)
	#!/usr/bin/env python3

	# Licence: GPLV3

	import requests
	from bs4 import BeautifulSoup as soup
	from sys import argv as argv
	from time import sleep

	def get_channel_id(url):
	def get_channel_name():
	# Title is of the form of "NAME - YouTube" so it needs
	# to be extracted.
	title = parser.title.text

	assert(title != None)

	return str.replace(title, " - YouTube", "")

	channel_html = requests.get(url)

	parser = soup(channel_html.text, "html.parser")

	# Get the channel name
	channel_name = get_channel_name()

	# Loop through all the meta elements, one of them contains
	# the channel id as an attribute.
	# The meta tags with channel id are of the form:
	# <meta itemprop="channelId" content="....">
	for meta_element in parser.find_all("meta"):
	# Does the itemprop exist in this meta tag?
	if "itemprop" in meta_element.attrs.keys():
	# The meta tag that has the itemprop attribute set
	# as channelId contains the channel id.
	if meta_element.attrs["itemprop"] == "channelId":
	# Channel id found, return it.
	channel_id = meta_element.attrs["content"]

	assert(channel_id != None)

	return (channel_name, channel_id)

	def get_ch_ids_list(parser):
	def get_channel_url():
	# Get the element with id main-link because it holds a href attribute
	# to the channel.
	main_link_tag = channel_html.find(id="main-link")

	# Return the channel url string
	return main_link_tag.attrs["href"]

	# Youtube does not load all channels in grid container.
	# Instead it loads multiple grid containers containing chunks
	# of the channels. This is because channels are loaded
	# as you scroll down the page.
	# So we need to parse all the grid containers.
	grid_containers = parser.find_all(id="grid-container")

	for grid_container in grid_containers:
	# Iterate through channels on the list.
	for channel_html in grid_container.children:
	# If we get an error while doing this, then try again until it succeeds.
	while True:
	try:
	channel_url = get_channel_url()

	# Get the channel id from the url.
	channel_name, channel_id = get_channel_id(channel_url)

	# Print the string to stdout
	# print("https://www.youtube.com/feeds/videos.xml?channel_id=" + channel_id, "\"~" + channel_name + "\" \"YouTube\"")
	print("#", channel_name)
	print("https://www.youtube.com/feeds/videos.xml?channel_id=" + channel_id, "\"!YouTube\"")

	# Sleep 1 second as to not piss off YT.
	sleep(1)

	# Break out of the true loop.
	break
	except Exception as error:
	# If we get an error, then retry.
	print("!Error: " + str(error), "Retrying...")

	if __name__ == "__main__":
	channels_html_path = argv[1]

	print("Using html file: " + channels_html_path)

	with open(channels_html_path, "r") as file:
	print("# Youtube Subscriptions")

	parser = soup(file.read(), 'html.parser')

	channel_ids = get_ch_ids_list(parser)