Skip to content

Instantly share code, notes, and snippets.

Created October 15, 2017 18:00
Show Gist options
  • Save anonymous/f0fd45ee679e72efadaa979ab168c182 to your computer and use it in GitHub Desktop.
Save anonymous/f0fd45ee679e72efadaa979ab168c182 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
html_doc = """
<html>
<body>
<div class="section-content-row">
<h2 class="rt-goods-list-item-name">
<a class="rt-goods-list-item-name-link" href="http://goods.ruten.com.tw/item/show?21716971337096" title="前往Lowe Alpine Strike 24 運動背包/都會日用後背包 FDP55 黑商品頁面" target="_blank">
<span class="rt-goods-list-item-name-text" itemprop="name">Lowe Alpine Strike 24 運動背包/都會日用後背包 FDP55 黑</span>
</a>
</h2>
<div class="rt-goods-list-item-price-sell">
<div class="rt-goods-list-item-price">
<strong class="item-price-symbol rt-text-larger rt-text-price" itemprop="offers" itemscope="" itemtype="http://schema.org/Offer">2,363<meta itemprop="price" content="2363"><meta itemprop="priceCurrency" content="TWD">
</strong>
</div>
</div>
</div>
</body>
</html>
"""
soup = BeautifulSoup(html_doc, 'html.parser')
for each_div in soup.findAll('div',{'class':'section-content-row'}):
urls = each_div.find_all('a',{'class':'rt-goods-list-item-name-link'})
url = [url['href'] for url in urls]
print url
titles = each_div.find_all('span',{'class':'rt-goods-list-item-name-text'})
title = [title.get_text() for title in titles]
print(title)
prices = each_div.find_all('strong', {'class' : 'rt-text-price'})
price = [price.get_text() for price in prices]
print price
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment