Created
August 12, 2023 23:23
-
-
Save EZonTheEyes/4aedb08e8bf334f8686fe1563e2850c3 to your computer and use it in GitHub Desktop.
PyRSS WIP July 5
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# RSS Specification: https://www.rssboard.org/rss-draft-1 | |
# Sample RSS XML file: https://www.rssboard.org/files/rss-2.0-sample.xml | |
# Python XML documentation: https://docs.python.org/3/library/xml.etree.elementtree.html | |
# Juicy xpath thingy for looking up elements: https://docs.python.org/3/library/xml.etree.elementtree.html#xpath-support | |
# The plan: | |
# Plan (real (real v reall)) (extremely real) | |
# - call on headlines & print (specify ammount)) <item, title> | |
# - print text preview (body of article) (specify characters) <item, description> | |
# - Print URL Link <link> | |
# - ... | |
# - Do not die when encountering errors, but continue showing other feeds | |
# - Image Preview (covert to Ascii with shady libs) | |
# - Random article Function...? | |
# - Remove Non XML RSS feed Links, maybe print a warning - error? | |
# - Gui????? maybe | |
# - ??? | |
# - Profit. | |
# - Nerd Plan | |
# - Downlaod XML from RSS feed (download manually first for conveneince) === DONE | |
# - Parse the properties we need from the downloaded XML file === DONE | |
# - Print them bad boys out === DONE | |
# - Profit. === CLOSE ENOUGH | |
# - DAY 1 === Almost Totally Done. | |
import xml.etree.ElementTree as ET | |
from urllib.request import urlopen | |
print("💜 testie line for my bestie line 💜") | |
def print_rss_feed(rss_xml): | |
try: | |
# Read the XML from the `rss_xml` string | |
root = ET.fromstring(rss_xml) | |
except: | |
print("Oops! XML Library Error.") | |
return | |
news_source = root.findall("./channel/title")[0].text | |
# Loop over every item in the list, and print out the title, description and link | |
item_list = root.findall("./channel/item") # No [0], grab all the items | |
for item in item_list: | |
print("-----") | |
print(f"news source: {news_source}") | |
item_title = item.findall("./title")[0].text | |
print(f"title: {item_title}") | |
item_desc = item.findall("./description")[0].text | |
print(f"description: {item_desc}") | |
item_link = item.findall("./link")[0].text | |
print(f"link: {item_link}") | |
urls = ["http://feeds.feedburner.com/ign/all", "https://www.cnn.com/services/rss/", "https://englishcode.wordpress.com/feed", "https://media.rss.com/welovethestate/feed.xml", "https://moxie.foxnews.com/google-publisher/politics.xml"] | |
# Loop over all of the urls from the array, and print the feed for each one | |
for url in urls: | |
# Download the RSS feed from the URL | |
with urlopen(url) as response: | |
response_rss = response.read() | |
# Parse and print the feed :) | |
print_rss_feed(response_rss) | |
print("======") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment