Skip to content
Create a gist now

Instantly share code, notes, and snippets.

Embed URL


Subversion checkout URL

You can clone with
Download ZIP
Scraping restaurants in Javascript
* This example shows how to collect restaurant information and menus on the fly.
// Instantiate Bobik client from Bobik SDK available at
// You're welcome to link directly to the JS file. However we make no guarantees about keeping the link unchanged.
// Thus, you should do it only when you have a quick and immediate access to where this url is used (e.g. during development)
var bobik = new Bobik("YOUR_AUTH_TOKEN");
// Finds restaurant directory information (name, website, address, menu_url).
// Upon success, triggers find_menus().
function find_restaurants(neighborhood, cuisine) {
console.log("Looking for " + cuisine + " restaurants in " + neighborhood + "...");
var src_url = "" + neighborhood + "/" + cuisine;
urls: [src_url],
query_set: "menupages"
}, function (scraped_data) {
if (!scraped_data) {
console.log("Data is unavailable");
var restaurants = scraped_data[src_url]
if (!restaurants || restaurants.length == 0) {
console.log("Did not find any restaurants");
var restaurants = group_restaurants(restaurants);
console.log("Found " + restaurants.length + " restaurants");
var print_as_they_become_available = true;
if (print_as_they_become_available)
// A helper function that takes a hash of restaurant names, addresses and websites,
// and turns them into an array of grouped restaurant attributes.
// Also, each restaurant is augmented with the menu url.
function group_restaurants(restaurants) {
var names = restaurants['Name']; // an array of names
var addresses = restaurants['Address']; // an array of addresses
var urls = restaurants['Url']; // an array of urls
var restaurants = [];
for (var i=0; i<names.length; i++) {
var website = "" + urls[i];
// push this restaurant to the array of results
'name' : names[i],
'address' : addresses[i],
'website' : website,
'menu_url' : website + "menu"
return restaurants;
// Finds menus for all restaurants and adds those menus to the corresponding restaurant hashes.
// Upon completion, prints full restaurant information.
// This variant processes restaurants in parallel and prints them out as the information becomes available.
function find_menus_async(restaurants) {
console.log("Looking for menus...");
for (var x in restaurants) {
var restaurant = restaurants[x];
var menu_url = restaurant['menu_url'];
urls: [menu_url], // send only one at a time (and don't wait for it to complete before sending the next)
query_set: "menu"
}, function (scraped_data) {
restaurant['menu'] = scraped_data[menu_url];
console.log("Found restaurant:" + restaurant);
// This variant of find_menu displays results only when all are ready.
function find_menus_sync(restaurants) {
console.log("Looking for menus...");
// Assemble a list of menu urls and a {url -> restaurant} map.
// We need this map to match results (since they will be bucketed by url)
var menu_urls = new Array();
var url_to_restaurant = {};
for (var x in restaurants) {
var restaurant = restaurants[x];
var menu_url = restaurant['menu_url'];
url_to_restaurant[menu_url] = restaurant;
urls: menu_urls,
query_set: "menu"
}, function (scraped_data) {
for (var url in scraped_data)
url_to_restaurant[url]['menu'] = scraped_data[url];
// Go!
//find_restaurants('soma', 'italian')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.