Created
December 21, 2023 10:56
-
-
Save sspaeti/5b8e235172e3cefe650ca818ec159584 to your computer and use it in GitHub Desktop.
Generating RSS feed based on existing `sitemap.xml`
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xml.etree.ElementTree as ET | |
import PyRSS2Gen as rss | |
import datetime | |
# Parse the sitemap.xml | |
tree = ET.parse("book/sitemap.xml") | |
root = tree.getroot() | |
# Define the RSS feed basic info | |
rss_feed = rss.RSS2( | |
title="Data Engineering Design Patterns (DEDP)", | |
link="https://www.dedp.online", | |
description="Data Engineering Design Patterns Book: Mastering Convergent Evolution", | |
lastBuildDate=datetime.datetime.now(), | |
items=[], | |
) | |
# Extract URLs and their metadata | |
for url in root.findall("{http://www.sitemaps.org/schemas/sitemap/0.9}url"): | |
loc = url.find("{http://www.sitemaps.org/schemas/sitemap/0.9}loc").text | |
if loc.endswith("/"): | |
loc = loc[:-1] # Remove the trailing slash | |
loc += ".html" # Append .html | |
lastmod = url.find("{http://www.sitemaps.org/schemas/sitemap/0.9}lastmod").text | |
lastmod_date = datetime.datetime.strptime(lastmod, "%Y-%m-%dT%H:%M:%S.%fZ") | |
# Add item to the RSS feed | |
rss_feed.items.append( | |
rss.RSSItem( | |
title=loc.split("/")[-1], | |
link=loc, | |
description=f"Updated on {lastmod}", | |
pubDate=lastmod_date, | |
) | |
) | |
# Generate the RSS feed | |
rss_feed.write_xml(open("book/feed.xml", "w")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment