Skip to content

Instantly share code, notes, and snippets.

@baobao
Last active December 7, 2018 03:07
Show Gist options
  • Save baobao/42bab5a64746ede47200cc3436d23463 to your computer and use it in GitHub Desktop.
Save baobao/42bab5a64746ede47200cc3436d23463 to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup
html = """
<html><body>
<h1><a href="http://www.shibuya24.info/">渋谷ほととぎす通信</a></h1>
<p>ブログ更新が</p>
<p>とてもとても</p>
<div id="piyo">滞っています</div>
<ul id="test">
<li><a href="http://www.shibuya24.info/archive/category/Unity">Unity</a></li>
<li><a href="http://www.shibuya24.info/archive/category/DOTween">DOTween</a></li>
</ul>
<p class="hoge">でも頑張ります</p>
</body></html>
"""
# 第2引数でパーサーの種類を指定する
soup = BeautifulSoup(html, 'html.parser')
h1 = soup.html.body.h1
# 1つ目のp要素取得
p1 = soup.html.p
# 2つ目のp要素取得
p2 = p1.next_sibling.next_sibling
# id="piyo"を取得
idPiyo = soup.find(id="piyo")
idPiyo2 = soup.select("#piyo")
# class要素を取得
classHoge = soup.find(class_="hoge")
classHoge2 = soup.select(".hoge")
# 全a情報を取得する
linkGroup = soup.find_all("a")
# DOM構造で取得する
listGroup = soup.select("ul#test > li")
print("h1:", h1.string)
print("p1:", p1.string)
print("p2:", p2.string)
print("idPiyo", idPiyo.string)
for piyo in idPiyo:
print("piyo", piyo.string)
print("classHoge", classHoge.string)
for hoge in classHoge2:
print("classHoge", hoge.string)
for link in linkGroup:
href = link.attrs['href']
print("URL", href)
for list in listGroup:
print(list.string)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment