Skip to content

Instantly share code, notes, and snippets.

@yat1ma30
Created August 26, 2013 11:56
Show Gist options
  • Save yat1ma30/6340722 to your computer and use it in GitHub Desktop.
Save yat1ma30/6340722 to your computer and use it in GitHub Desktop.
与えられたURLからRSSフィードのURLをリトリーブ。
# -*- coding: utf-8 -*-
import requests
from BeautifulSoup import BeautifulSoup
import urlparse
def get(url):
"""URLからフィードURLをリトリーブします。
もしもURLにアクセスできなかった、
またはフィードが見つからなかった場合はNoneを返します。
"""
# httpがついていなかった場合
if ("http" not in url):
url = "http://{0}".format(url)
p = urlparse.urlparse(url)
# http://xxx.com/aaa/bbb/ => http://xxx.com
root = "{0}://{1}".format(p.scheme, p.hostname)
try:
r = requests.get(url)
except:
return None
else:
# URLにアクセスできた時はフィードを取得。
soup = BeautifulSoup(r.text)
feed = soup.find('link', attrs={'type': 'application/atom+xml'})
rss = soup.find('link', attrs={'type': 'application/rss+xml'})
if feed:
return feed['href'] if ("http" in feed['href']) else urlparse.urljoin(root, feed['href'])
elif rss:
return rss['href'] if ("http" in rss) else urlparse.urljoin(root, rss['href'])
else:
return None
if __name__ == "__main__":
url = "ottati.hatenablog.com"
print get(url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment