puhitaku/brooks.py

## brooks.py
import re   #regex
import requests as req
from bs4 import BeautifulSoup as bs

def get_text(url):
    src = req.get(url)
    return src.text.encode(src.encoding)

def get_price(product_url):
    soup = bs( get_text(product_url) )
    line = soup.find('td', {'class': 'sp'})
    raw  = line.b.contents[0]

    raw   = raw.replace(',','')
    price = re.compile('^[0-9]*').match(raw).group()

    print(price)

def get_products(category_url):
    soup  = bs( get_text(category_url) )

    #Match all a tags that contain product info
    prods = soup.find_all('a', text=re.compile('(豆|挽)\s(.*)\s(.*[0-9]*)g'))
    #Get rid of tags
    prods = [x.next_element for x in prods]
    #Match several infos about products
    prods = [re.compile('\n.*(豆|挽)\s(.*)\s([0-9]*)g').match(x).group(1,2,3) for x in prods]

    #Match raw texts and get rid of tags
    prices = [x.next_element for x in soup.find_all('b', text=re.compile('(.*)円\(税抜\)'))]
    #Match prices
    prices = [re.compile('(.*)円\(税抜\)').match(x).group(1).replace(',','') for x in prices]
    #Convert them into int
    prices = [int(x) for x in prices]

    return [{'kind': x[0], 'name': x[1], 'gram': x[2], 'price': y} for x, y in zip(prods, prices)]


#get_price('http://www.brooks.co.jp/refer/syosai.php?SHNCOD=29205')

info = get_products('http://www.brooks.co.jp/refer/ichiran.php?CATEGORY=241')

for x in info:
    print('Kind:', x['kind'])
    print('Name:', x['name'])
    print('Grams:', x['gram'], '[g]')
    print('Price:', x['price'], '[Yen]')
    print()
	import re #regex
	import requests as req
	from bs4 import BeautifulSoup as bs

	def get_text(url):
	src = req.get(url)
	return src.text.encode(src.encoding)

	def get_price(product_url):
	soup = bs( get_text(product_url) )
	line = soup.find('td', {'class': 'sp'})
	raw = line.b.contents[0]

	raw = raw.replace(',','')
	price = re.compile('^[0-9]*').match(raw).group()

	print(price)

	def get_products(category_url):
	soup = bs( get_text(category_url) )

	#Match all a tags that contain product info
	prods = soup.find_all('a', text=re.compile('(豆\|挽)\s(.)\s(.[0-9]*)g'))
	#Get rid of tags
	prods = [x.next_element for x in prods]
	#Match several infos about products
	prods = [re.compile('\n.(豆\|挽)\s(.)\s([0-9]*)g').match(x).group(1,2,3) for x in prods]

	#Match raw texts and get rid of tags
	prices = [x.next_element for x in soup.find_all('b', text=re.compile('(.*)円\(税抜\)'))]
	#Match prices
	prices = [re.compile('(.*)円\(税抜\)').match(x).group(1).replace(',','') for x in prices]
	#Convert them into int
	prices = [int(x) for x in prices]

	return [{'kind': x[0], 'name': x[1], 'gram': x[2], 'price': y} for x, y in zip(prods, prices)]


	#get_price('http://www.brooks.co.jp/refer/syosai.php?SHNCOD=29205')

	info = get_products('http://www.brooks.co.jp/refer/ichiran.php?CATEGORY=241')

	for x in info:
	print('Kind:', x['kind'])
	print('Name:', x['name'])
	print('Grams:', x['gram'], '[g]')
	print('Price:', x['price'], '[Yen]')
	print()