Skip to content

Instantly share code, notes, and snippets.

@icebeat
Created January 11, 2016 09:59
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save icebeat/61d0e9c1c97d4c0d5333 to your computer and use it in GitHub Desktop.
Save icebeat/61d0e9c1c97d4c0d5333 to your computer and use it in GitHub Desktop.
Tripadvisor parser with x-ray
"use strict";
const Xray = require('x-ray');
const x = Xray();
const fs = require('fs');
const tripavisorUrl = 'http://www.tripadvisor.com/Restaurant_Review-g187514-d4760195-Reviews-La_Huerta_de_Tudela-Madrid.html';
x(tripavisorUrl, {
id: '.mapContainer@data-locid',
name: '.mapContainer@data-name',
phone: '.phoneNumber',
url: '[hreflang="en"]@href',
ranking: '.slim_ranking',
rating: '[property="ratingValue"]@content',
reviews: '[property="reviewCount"]@content',
lat: '.mapContainer@data-lat',
lng: '.mapContainer@data-lng',
neighborhood: '.detail.neighborhood .name',
address: x('.format_address', {
streetAddress: '[property="streetAddress"]',
locality: '[property="addressLocality"]',
region: '[property="addressRegion"]@content',
postcode: '[property="postalCode"]',
country: '[property="addressCountry"]'
}),
hours: x('.hoursOverlay div:not(:first-child)', [{
days: '.days',
hours: '.hours'
}]),
scoring: x('.visitorRating .barChart li', [{
value: '.valueCount',
label: '.label'
}]),
price: '.threeColumnList .contiguousSection:nth-child(2) span',
photos: x('.flexible_photo_album_link + .flexible_photo_cell a@href',
x('.photoBox', [{
name: '.photoCap strong',
description: '.capDescription',
src: '.big_photo@src'
}])
).paginate('.next.pageLink a@href')
})((e, json) => {
const updateEateryUrl = 'http://www.tripadvisor.com/GetListedUpdateEatery?id=' + json.id;
x(updateEateryUrl, {
website: '[name="url"]@value',
cuisine: x('.cuisine', [{
name: '.cuisine_name',
id: 'input@value'
}])
})((e, extendedJson) => {
Object.assign(json, extendedJson);
fs.writeFile('tripadvisor.json', JSON.stringify(json, true, 2), (err) => {
if (err) throw err;
console.log('It\'s saved!');
});
})
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment