public
Created

Scraping restaurants in Javascript

  • Download Gist
gistfile1.js
JavaScript
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
/*
* This example shows how to collect restaurant information and menus on the fly.
*/
 
// Instantiate Bobik client from Bobik SDK available at http://usebobik.com/sdk.
// You're welcome to link directly to the JS file. However we make no guarantees about keeping the link unchanged.
// Thus, you should do it only when you have a quick and immediate access to where this url is used (e.g. during development)
var bobik = new Bobik("YOUR_AUTH_TOKEN");
 
 
// Finds restaurant directory information (name, website, address, menu_url).
// Upon success, triggers find_menus().
function find_restaurants(neighborhood, cuisine) {
console.log("Looking for " + cuisine + " restaurants in " + neighborhood + "...");
var src_url = "http://sanfrancisco.menupages.com/restaurants/all-areas/" + neighborhood + "/" + cuisine;
bobik.scrape({
urls: [src_url],
query_set: "menupages"
}, function (scraped_data) {
if (!scraped_data) {
console.log("Data is unavailable");
return;
}
var restaurants = scraped_data[src_url]
if (!restaurants || restaurants.length == 0) {
console.log("Did not find any restaurants");
return;
}
var restaurants = group_restaurants(restaurants);
console.log("Found " + restaurants.length + " restaurants");
var print_as_they_become_available = true;
if (print_as_they_become_available)
find_menus_async(restaurants);
else
find_menus_sync(restaurants);
})
}
 
 
// A helper function that takes a hash of restaurant names, addresses and websites,
// and turns them into an array of grouped restaurant attributes.
// Also, each restaurant is augmented with the menu url.
function group_restaurants(restaurants) {
var names = restaurants['Name']; // an array of names
var addresses = restaurants['Address']; // an array of addresses
var urls = restaurants['Url']; // an array of urls
var restaurants = [];
for (var i=0; i<names.length; i++) {
var website = "http://sanfrancisco.menupages.com" + urls[i];
// push this restaurant to the array of results
restaurants.push({
'name' : names[i],
'address' : addresses[i],
'website' : website,
'menu_url' : website + "menu"
})
}
return restaurants;
}
 
 
// Finds menus for all restaurants and adds those menus to the corresponding restaurant hashes.
// Upon completion, prints full restaurant information.
// This variant processes restaurants in parallel and prints them out as the information becomes available.
function find_menus_async(restaurants) {
console.log("Looking for menus...");
for (var x in restaurants) {
var restaurant = restaurants[x];
var menu_url = restaurant['menu_url'];
bobik.scrape({
urls: [menu_url], // send only one at a time (and don't wait for it to complete before sending the next)
query_set: "menu"
}, function (scraped_data) {
restaurant['menu'] = scraped_data[menu_url];
console.log("Found restaurant:" + restaurant);
})
}
}
 
 
// This variant of find_menu displays results only when all are ready.
function find_menus_sync(restaurants) {
console.log("Looking for menus...");
// Assemble a list of menu urls and a {url -> restaurant} map.
// We need this map to match results (since they will be bucketed by url)
var menu_urls = new Array();
var url_to_restaurant = {};
for (var x in restaurants) {
var restaurant = restaurants[x];
var menu_url = restaurant['menu_url'];
menu_urls.push(menu_url);
url_to_restaurant[menu_url] = restaurant;
}
 
bobik.scrape({
urls: menu_urls,
query_set: "menu"
}, function (scraped_data) {
for (var url in scraped_data)
url_to_restaurant[url]['menu'] = scraped_data[url];
console.log(restaurants);
})
}
 
// Go!
//find_restaurants('soma', 'italian')

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.