Skip to content

Instantly share code, notes, and snippets.

@capjamesg
Created June 5, 2024 13:25
Show Gist options
  • Save capjamesg/d8ab9f3e00e8d2709217e481f94ee8fb to your computer and use it in GitHub Desktop.
Save capjamesg/d8ab9f3e00e8d2709217e481f94ee8fb to your computer and use it in GitHub Desktop.
import requests
from granary import atom, jsonfeed, microformats2, rss
USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"
feeds = [
"https://jamesg.blog",
"https://jamesg.blog/feeds/posts.xml",
"https://granary.io/url?input=html&output=atom&url=https://jamesg.blog",
"https://granary.io/url?input=html&output=jsonfeed&url=https://jamesg.blog"
]
FEED_IDENTIFICATION = {
"rss+xml": rss.to_activities,
"atom+xml": atom.atom_to_activities,
"html": microformats2.html_to_activities,
"feed+json": jsonfeed.jsonfeed_to_activities,
"json": jsonfeed.jsonfeed_to_activities,
"mf2+json": microformats2.json_to_activities,
}
CONVERSION_FUNCTION = jsonfeed.activities_to_jsonfeed
for feed in feeds:
try:
resp = requests.get(
feed, headers={"User-Agent": USER_AGENT}, allow_redirects=True
)
except requests.RequestException:
print("Failed to fetch", feed)
continue
if resp.status_code != 200:
print("Failed to fetch", feed, "with status code", resp.status_code)
continue
content_type = resp.headers.get("Content-Type", "").split(";")[0].split("/")[1]
if content_type not in FEED_IDENTIFICATION:
print("Unsupported feed type", content_type)
continue
if content_type in ["json", "feed+json"]:
activities = CONVERSION_FUNCTION(FEED_IDENTIFICATION[content_type](resp.json())[0])
else:
activities = CONVERSION_FUNCTION(FEED_IDENTIFICATION[content_type](resp.text))
print("Fetched", feed, "with", len(activities), "activities using feed type", content_type)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment