Skip to content

Instantly share code, notes, and snippets.

@alexmill
Last active December 12, 2023 19:56
Show Gist options
  • Save alexmill/9bc634240531d81c3abe to your computer and use it in GitHub Desktop.
Save alexmill/9bc634240531d81c3abe to your computer and use it in GitHub Desktop.
#!/usr/local/bin/python3.3
from bs4 import BeautifulSoup as bs4
import requests
import feedparser
import urllib.parse
def findfeed(site):
raw = requests.get(site).text
result = []
possible_feeds = []
html = bs4(raw)
feed_urls = html.findAll("link", rel="alternate")
if len(feed_urls) > 1:
for f in feed_urls:
t = f.get("type",None)
if t:
if "rss" in t or "xml" in t:
href = f.get("href",None)
if href:
possible_feeds.append(href)
parsed_url = urllib.parse.urlparse(site)
base = parsed_url.scheme+"://"+parsed_url.hostname
atags = html.findAll("a")
for a in atags:
href = a.get("href",None)
if href:
if "xml" in href or "rss" in href or "feed" in href:
possible_feeds.append(base+href)
for url in list(set(possible_feeds)):
f = feedparser.parse(url)
if len(f.entries) > 0:
if url not in result:
result.append(url)
return(result)
@alexmill
Copy link
Author

alexmill commented Sep 3, 2020

@dvershinin Thanks! Fixed the article so it just shows this embedded gist.

@ranok
Copy link

ranok commented Feb 28, 2022

@alexmill Why is the comparison in l.13 strictly > 1, what is there is only one RSS feed linked? Should it not be >= 1

@alexmill
Copy link
Author

@ranok I think no reason in particular

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment