Created
October 15, 2017 18:00
-
-
Save anonymous/f0fd45ee679e72efadaa979ab168c182 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from bs4 import BeautifulSoup | |
html_doc = """ | |
<html> | |
<body> | |
<div class="section-content-row"> | |
<h2 class="rt-goods-list-item-name"> | |
<a class="rt-goods-list-item-name-link" href="http://goods.ruten.com.tw/item/show?21716971337096" title="前往Lowe Alpine Strike 24 運動背包/都會日用後背包 FDP55 黑商品頁面" target="_blank"> | |
<span class="rt-goods-list-item-name-text" itemprop="name">Lowe Alpine Strike 24 運動背包/都會日用後背包 FDP55 黑</span> | |
</a> | |
</h2> | |
<div class="rt-goods-list-item-price-sell"> | |
<div class="rt-goods-list-item-price"> | |
<strong class="item-price-symbol rt-text-larger rt-text-price" itemprop="offers" itemscope="" itemtype="http://schema.org/Offer">2,363<meta itemprop="price" content="2363"><meta itemprop="priceCurrency" content="TWD"> | |
</strong> | |
</div> | |
</div> | |
</div> | |
</body> | |
</html> | |
""" | |
soup = BeautifulSoup(html_doc, 'html.parser') | |
for each_div in soup.findAll('div',{'class':'section-content-row'}): | |
urls = each_div.find_all('a',{'class':'rt-goods-list-item-name-link'}) | |
url = [url['href'] for url in urls] | |
print url | |
titles = each_div.find_all('span',{'class':'rt-goods-list-item-name-text'}) | |
title = [title.get_text() for title in titles] | |
print(title) | |
prices = each_div.find_all('strong', {'class' : 'rt-text-price'}) | |
price = [price.get_text() for price in prices] | |
print price | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment