Last active
December 7, 2018 03:07
-
-
Save baobao/42bab5a64746ede47200cc3436d23463 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
html = """ | |
<html><body> | |
<h1><a href="http://www.shibuya24.info/">渋谷ほととぎす通信</a></h1> | |
<p>ブログ更新が</p> | |
<p>とてもとても</p> | |
<div id="piyo">滞っています</div> | |
<ul id="test"> | |
<li><a href="http://www.shibuya24.info/archive/category/Unity">Unity</a></li> | |
<li><a href="http://www.shibuya24.info/archive/category/DOTween">DOTween</a></li> | |
</ul> | |
<p class="hoge">でも頑張ります</p> | |
</body></html> | |
""" | |
# 第2引数でパーサーの種類を指定する | |
soup = BeautifulSoup(html, 'html.parser') | |
h1 = soup.html.body.h1 | |
# 1つ目のp要素取得 | |
p1 = soup.html.p | |
# 2つ目のp要素取得 | |
p2 = p1.next_sibling.next_sibling | |
# id="piyo"を取得 | |
idPiyo = soup.find(id="piyo") | |
idPiyo2 = soup.select("#piyo") | |
# class要素を取得 | |
classHoge = soup.find(class_="hoge") | |
classHoge2 = soup.select(".hoge") | |
# 全a情報を取得する | |
linkGroup = soup.find_all("a") | |
# DOM構造で取得する | |
listGroup = soup.select("ul#test > li") | |
print("h1:", h1.string) | |
print("p1:", p1.string) | |
print("p2:", p2.string) | |
print("idPiyo", idPiyo.string) | |
for piyo in idPiyo: | |
print("piyo", piyo.string) | |
print("classHoge", classHoge.string) | |
for hoge in classHoge2: | |
print("classHoge", hoge.string) | |
for link in linkGroup: | |
href = link.attrs['href'] | |
print("URL", href) | |
for list in listGroup: | |
print(list.string) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment