Skip to content

Instantly share code, notes, and snippets.

Last active Mar 7, 2019
What would you like to do?
Nakau stores data scraping - 1
import requests
import csv
from bs4 import BeautifulSoup
from requests.exceptions import HTTPError
def get_data_nakau(storeid):
Get the data of the store with given storeid
storeid (int)
None if the storeid does not exist
dict with following keys
store_details = {'storeid': storeid}
url = f'{storeid}.html'
r = requests.get(url, allow_redirects=False)
# Catch responses > 300
if r.status_code >= 300:
raise HTTPError
except HTTPError:
print("Page not found. Status code is not 200.")
return None
soup = BeautifulSoup(r.text, 'html.parser')
# Get name of the shop
shop = soup.find('div', {'class': 'shop'})
shopdetails = shop.text.strip('\n').split('\u3000')
store_details['brand'] = shopdetails[0]
store_details['name'] = shopdetails[1]
# Get data on shop location
location = soup.find('dl', {"class": "address"})
# Extract the google map link of the details
maplink = location.div.extract()
# split the url to google map, which is in the format on,134.84215994866
latlng = maplink.a['href'].strip('').split(',')
lat = latlng[0]
lon = latlng[1]
# Extract the postalCode, which is stored in the span class of the address data list
postal = location.span.extract()
store_details['postalCode'] = postal.text.strip('〒')
# Get address
store_details['address'] = location.dd.text.strip('\n')
Get business hours and other details using API
# Query the API by the name of the shop
api_url = '{}'.format(store_details['name'])
r_api = requests.get(api_url).json()
# data of the store (in dict format) is bounded inside a list, thus [0] is needed
datalist = r_api['mapdata'][0]
store_details['lat'] = datalist['lat']
store_details['lon'] = datalist['lng']
store_details['business_hour1'] = datalist['business_hour1']
store_details['business_hour2'] = datalist['business_hour2']
store_details['business_hour3'] = datalist['business_hour3']
return store_details
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment