Skip to content

Instantly share code, notes, and snippets.

@mrorii
Last active December 21, 2015 11:59
Show Gist options
  • Save mrorii/6302734 to your computer and use it in GitHub Desktop.
Save mrorii/6302734 to your computer and use it in GitHub Desktop.
Check number of recipes on https://en.cookpad.com/
#!/usr/bin/env python
import os
import time
import urlparse
import requests
from scrapy.selector import HtmlXPathSelector
def get_count(url):
r = requests.get(url)
hxs = HtmlXPathSelector(text=r.text)
count = int(hxs.select("//h1[@class='title page_title']/span").re('\d+')[0])
return count
def main():
urls = [
'https://en.cookpad.com/categories/vegetables',
'https://en.cookpad.com/categories/meat',
'https://en.cookpad.com/categories/fish',
'https://en.cookpad.com/categories/salad',
'https://en.cookpad.com/categories/rice',
'https://en.cookpad.com/categories/noodles',
'https://en.cookpad.com/categories/pasta-gratin',
'https://en.cookpad.com/categories/soup-stew',
'https://en.cookpad.com/categories/bento',
'https://en.cookpad.com/categories/tofu-soybeans',
'https://en.cookpad.com/categories/egg',
'https://en.cookpad.com/categories/flour',
'https://en.cookpad.com/categories/sauce-dressing',
'https://en.cookpad.com/categories/healthy-meal',
'https://en.cookpad.com/categories/party',
'https://en.cookpad.com/categories/spice-herb',
'https://en.cookpad.com/categories/yogurt',
'https://en.cookpad.com/categories/miso-vinegar-fermentation',
'https://en.cookpad.com/categories/sea-vegetables',
'https://en.cookpad.com/categories/hot-pot-and-nabe',
'https://en.cookpad.com/categories/japanese-new-year',
'https://en.cookpad.com/categories/regional',
'https://en.cookpad.com/categories/christmas',
'https://en.cookpad.com/categories/finger%20food',
'https://en.cookpad.com/categories/bread',
'https://en.cookpad.com/categories/korokke-croquette',
'https://en.cookpad.com/categories/chinese',
]
for url in urls:
count = get_count(url)
name = os.path.basename(urlparse.urlsplit(url).path)
print('{0:25s}\t{1}'.format(name, count))
time.sleep(1)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment