mursts/scrape.py

## scrape.py
#!/usr/bin/env python
# coding: utf-8

import requests # pip install requests
import re

# liタグのclassが"toctree-l1"のタイトルを取得する正規表現
rgx = re.compile('<li class="toctree-l1">.*>(.*)</a>')

def main():
    url = 'http://pycamp.pycon.jp/textbook/index.html'
    # 象のページのHTMLを取得
    r = requests.get(url)
    # 文字コードを自動認識させる
    r.encoding = r.apparent_encoding

    # 改行コードで分割して1行ずつ処理を行う
    for x in r.text.split('\n'):
        # 正規表現に一致するか
        match = rgx.search(x)
        if match:
            # 一致した場合
            print(match.group(1))

if __name__ == '__main__':
    main()
	#!/usr/bin/env python
	# coding: utf-8

	import requests # pip install requests
	import re

	# liタグのclassが"toctree-l1"のタイトルを取得する正規表現
	rgx = re.compile('<li class="toctree-l1">.>(.)</a>')

	def main():
	url = 'http://pycamp.pycon.jp/textbook/index.html'
	# 象のページのHTMLを取得
	r = requests.get(url)
	# 文字コードを自動認識させる
	r.encoding = r.apparent_encoding

	# 改行コードで分割して1行ずつ処理を行う
	for x in r.text.split('\n'):
	# 正規表現に一致するか
	match = rgx.search(x)
	if match:
	# 一致した場合
	print(match.group(1))

	if __name__ == '__main__':
	main()