Skip to content

Instantly share code, notes, and snippets.

@karno
Created January 7, 2019 07:07
Show Gist options
  • Save karno/6d1b60df2c0c945d5b49713c4e2b0a0f to your computer and use it in GitHub Desktop.
Save karno/6d1b60df2c0c945d5b49713c4e2b0a0f to your computer and use it in GitHub Desktop.
fetch menus from sushiro
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# SSRL: SuShiRo menu List acquisitor script.
import datetime
import json
import re
import sys
import requests
SSR_MENU_TOP = "https://www.akindo-sushiro.co.jp/m/menu/"
RE_KEY = '<a href="([^"]*)"'
CAT_KEY = '<!--Contents-->[ \t\r\n]*<div[^>]*>([^<]*)</div'
ITEM_KEY = '<td[^>]*>[ \t\r\n]*<div[^>]*>([^<]*)</div'
HAS_NEXT = '<a[^>]*>[ \t\r\n]*<span[^>]*>次へ</span>'
def main():
with prepare_log() as log:
try:
# fetch top of menu
resp = requests.get(SSR_MENU_TOP)
resp.raise_for_status()
links = re.findall(RE_KEY, resp.text)
categories = list(
[t for t in links if t.startswith('category.php?')])
if len(categories) == 0:
raise ValueError('link of categories are not found.')
menus = {}
# fetch categorie
for c in categories:
key, items = fetch_category(SSR_MENU_TOP + c)
menus[key] = items
# dump into json
with open('sushiro_menu.json', 'w') as f:
json.dump(menus, f, ensure_ascii=False, indent=4,
sort_keys=True, separators=(',', ': '))
except:
print(sys.exc_info())
log.write(str(sys.exc_info()))
def fetch_category(category):
print('req: ' + category)
resp = requests.get(category)
resp.raise_for_status()
cats = re.findall(CAT_KEY, resp.text)
assert len(cats) == 1
cat = cats[0]
items = re.findall(ITEM_KEY, resp.text)
if len(re.findall(HAS_NEXT, resp.text)) > 0:
next = True
page = 2
while next:
sub_items, next = fetch_subpage(category, page)
if sub_items is None:
break
items.extend(sub_items)
page += 1
return cat, items
def fetch_subpage(category, page):
print('req: ' + category + '&page={}'.format(page))
resp = requests.get(category + '&page={}'.format(page))
resp.raise_for_status()
cats = re.findall(CAT_KEY, resp.text)
if len(cats) == 0:
return None, False
items = re.findall(ITEM_KEY, resp.text)
next = len(re.findall(HAS_NEXT, resp.text)) > 0
return items, next
def prepare_log():
f = open('ssrl.log', 'w')
f.write('SSRL: {}\n'.format(datetime.datetime.now()))
return f
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment