Skip to content

Instantly share code, notes, and snippets.

@KlonD90
Created January 16, 2014 22:28
Show Gist options
  • Save KlonD90/8464758 to your computer and use it in GitHub Desktop.
Save KlonD90/8464758 to your computer and use it in GitHub Desktop.
var cheerio = require('cheerio'),
request = require('request'),
MongoClient = require('mongodb').MongoClient,
Q = require('q');
var page = 1;
MongoClient.connect('mongodb://localhost/gamendex', function(err, db) {
if(err) throw err;
console.log("Connected to Database");
var parsedCollection = db.collection('parsed');
function parsePage(page){
var defered = Q.defer();
var url = 'http://roxen.ru/?PAGEN_1='+page;
console.log(url);
request(url, function(err, res, body){
if(err){
console.log(err);
} else {
$ = cheerio.load(body);
var games = [];
$('#first .area_game').each(function(){
var info ={
site: 'roxen',
url: 'http://roxen.ru'+$('.pic_game a', this).attr('href'),
imageUrl: 'http://roxen.ru'+$('.pic_game img', this).attr('src'),
brands: [],
title: $('.game_title', this).text().trim(),
price: parseInt($('.price', this).text().trim(),10),
date: new Date()
};
$('.brand img',this).each(function(){
info.brands.push($(this).attr('title').trim());
});
games.push(info);
});
var pages = [];
$('#first .pages li a').each(function(){
pages.push(parseInt($(this).text(),10));
});
if(pages.indexOf(page+1)>0)
defered.resolve(page+1);
else{
defered.reject();
console.log('rejected', page, pages);
}
parsedCollection.insert(games,function(err,docs){
if(err) throw err;
});
}
});
return defered.promise;
};
var chain = Q.fcall(function(){
return parsePage(1);
});
for(var i = 2; i<100;i++){
chain = chain.then(function(page){
return parsePage(page);
});
}
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment