Skip to content

Instantly share code, notes, and snippets.

@sspaeti
Created December 21, 2023 10:56
Show Gist options
  • Save sspaeti/5b8e235172e3cefe650ca818ec159584 to your computer and use it in GitHub Desktop.
Save sspaeti/5b8e235172e3cefe650ca818ec159584 to your computer and use it in GitHub Desktop.
Generating RSS feed based on existing `sitemap.xml`
import xml.etree.ElementTree as ET
import PyRSS2Gen as rss
import datetime
# Parse the sitemap.xml
tree = ET.parse("book/sitemap.xml")
root = tree.getroot()
# Define the RSS feed basic info
rss_feed = rss.RSS2(
title="Data Engineering Design Patterns (DEDP)",
link="https://www.dedp.online",
description="Data Engineering Design Patterns Book: Mastering Convergent Evolution",
lastBuildDate=datetime.datetime.now(),
items=[],
)
# Extract URLs and their metadata
for url in root.findall("{http://www.sitemaps.org/schemas/sitemap/0.9}url"):
loc = url.find("{http://www.sitemaps.org/schemas/sitemap/0.9}loc").text
if loc.endswith("/"):
loc = loc[:-1] # Remove the trailing slash
loc += ".html" # Append .html
lastmod = url.find("{http://www.sitemaps.org/schemas/sitemap/0.9}lastmod").text
lastmod_date = datetime.datetime.strptime(lastmod, "%Y-%m-%dT%H:%M:%S.%fZ")
# Add item to the RSS feed
rss_feed.items.append(
rss.RSSItem(
title=loc.split("/")[-1],
link=loc,
description=f"Updated on {lastmod}",
pubDate=lastmod_date,
)
)
# Generate the RSS feed
rss_feed.write_xml(open("book/feed.xml", "w"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment