Last active
March 2, 2016 10:37
-
-
Save kurozumi/4aed0f2a74815289b659 to your computer and use it in GitHub Desktop.
【Python】Seleniumを使って標準入力したURLからRSSフィードのURLを探す方法
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
from selenium import webdriver | |
import re, urllib | |
# Firefox起動 | |
driver = webdriver.Firefox() | |
while True: | |
# 標準入力の値を取得 | |
url = raw_input() | |
# urlをパース | |
type, opaquestring = urllib.splittype(url) | |
# urlのスキームをチェック | |
if type not in ("http", "https"): | |
driver.close() | |
# ページ表示 | |
driver.get(url) | |
# linkタグを探す | |
for link in driver.find_elements_by_tag_name("link"): | |
# linkタグのtypeにrssが含まれているかチェック | |
if re.search('rss', link.get_attribute("type")) is not None: | |
# hrefを取得 | |
print link.get_attribute('href') | |
driver.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment