Skip to content

Instantly share code, notes, and snippets.

@f9n
Forked from alexmill/python3feedfinder
Last active May 29, 2017 21:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save f9n/46953ff26e7da165c9f20dfbe1cd8256 to your computer and use it in GitHub Desktop.
Save f9n/46953ff26e7da165c9f20dfbe1cd8256 to your computer and use it in GitHub Desktop.
#!/usr/local/bin/python3.6
import requests
import feedparser
from urllib.parse import urljoin
from lxml import html
def findfeedWithLxml(site):
raw = requests.get(site).content
result = []
possibleFeeds = []
tree = html.fromstring(raw)
feedUrls = tree.xpath("//link[@rel='alternate']")
if feedUrls:
for feed in feedUrls:
t = feed.xpath('@type')[0]
if t:
if "rss" in t or "xml" in t:
href = feed.xpath('@href')[0]
if href:
possibleFeeds.append(href)
atags = tree.xpath("//a")
for a in atags:
href = a.xpath('@href')
if href:
href = href[0]
if "xml" in href or "rss" in href or "feed" in href:
possibleFeeds.append(urljoin(site, href))
else:
print('Else', href)
for url in list(set(possibleFeeds)):
f = feedparser.parse(url)
if len(f.entries) > 0:
if url not in result:
result.append(url)
return (result)
if __name__ == '__main__':
print(findfeedWithLxml('http://www.bahcesehir.edu.tr/'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment