Skip to content

Instantly share code, notes, and snippets.

@ideiudicibus
Created September 3, 2012 16:11
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ideiudicibus/3610373 to your computer and use it in GitHub Desktop.
Save ideiudicibus/3610373 to your computer and use it in GitHub Desktop.
paginegialle.it cinema web scraping
var request = require('request'),
cheerio = require('cheerio'),
url= 'http://www.paginegialle.it/cinema-programmazione/Roma%20(RM)',
ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2';
var parsePage = function(error, response, body) {
if (error || response.statusCode != 200) {
console.log(error);
}
else {
$ = cheerio.load(body);
$('.one-sala').siblings().each(function(i, elem) {
var cinema = $(elem).find('div .col-1');
var cinemaName = $(cinema).find('h5').text();
var cinemaAddress = $(cinema).find('.address-cine').text();
var cinemaCity = $(cinema).find('.city-cine').text();
var cinemaPhone = $(cinema).find('.cine-tel').text();
var schedule = $(elem).find('div .col-2');
$(schedule).find('div .row').each(
function(key,value){
var filmName = $(value).find('div .cel-2 a').text();
var scheduleTime = $(value).find('div .cel-2 .film-orari').text().replace('orari:','');
}
);
})
}
};
request(
{
url : url,
headers : {
"User-Agent" : ua
}
},parsePage
);
@pastarace
Copy link

Hi, it seems they changed the policies so after the first attempts the site returns a "Block page"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment