Skip to content

Instantly share code, notes, and snippets.

@ThomasPe
Created April 27, 2016 14:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ThomasPe/a6df2e4135ba1815b30c49a9495755f0 to your computer and use it in GitHub Desktop.
Save ThomasPe/a6df2e4135ba1815b30c49a9495755f0 to your computer and use it in GitHub Desktop.
A simple Node.js script to crawl the Red Stripe Deals from the Windows Store and saving the app details into a JSON file.
var request = require('request');
var cheerio = require('cheerio');
var fs = require('fs');
var async = require('async');
var zlib = require('zlib');
var dealsUri = "https://www.microsoft.com/en-us/store/collections/redstripedeals/pc";
var baseUri = "http://microsoft.com";
var output = [];
var outputFilename = "redstripedeals.json";
//var outputFilename = "d:\\home\\site\\wwwroot\\redstripedeals2.json";
var redstripedealsParser = function (body, lang)
{
$ = cheerio.load(body);
$('figure h4 a').each(function ()
{
var appUri = baseUri + $(this).attr("href");
qDetails.push(appUri);
});
}
var loadDeals = function () {
var options = {
url: dealsUri,
port: 443,
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
'Accept-Language': 'en-us',
'Content-Language': 'en-us',
'Accept-Encoding': 'gzip'
},
timeout: 0,
encoding: null
};
request(options, function (err, resp, body) {
if (err) {
console.log("error loading page");
}
if (!err) {
if (resp.headers['content-encoding'] == 'gzip') {
zlib.gunzip(body, function (err, dezipped) {
redstripedealsParser(dezipped.toString());
});
} else {
redstripedealsParser(body);
}
}
});
}
var qDetails = async.queue(function (task, callback) {
console.log("get details: " + task);
var options = {
url: task,
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
'Accept-Language': 'en-us',
'Content-Language': 'en-us',
'Accept-Encoding': 'gzip'
},
timeout: 0,
encoding: null
};
request(options, function (err, resp, body) {
if (err)
{
console.log("error: " + task);
throw err;
}
if(resp.headers['content-encoding'] == 'gzip'){
zlib.gunzip(body, function(err, dezipped) {
GetDetails(dezipped.toString(), task, callback);
});
} else {
GetDetails(body, task, callback);
}
});
}, 1);
var GetDetails = function (body, task, callback) {
console.log("parse details: " + task);
$ = cheerio.load(body);
var app = {};
app["title"] = $('#page-title').text();
app["image"] = "http:" + $('.ph-logo > img').first().attr('src').toString();
app["description"] = $('.showmore > p').first().text();
app["price"] = $('.srv_price > span').first().text();
app["ratingValue"] = $('.srv_ratingsScore.win-rating-average').first().text();
app["ratingCount"] = $('.win-rating-total').first().text().replace(/\D/g, '');
app["packageSize"] = $('.metadata-list-content > div').eq(2).text().replace("\r\n", "").trim();
app["publisher"] = $('.metadata-list-content > div').eq(0).text().replace("\r\n", "").trim();
output.push(app);
callback();
}
qDetails.drain = function ()
{
fs.writeFile(outputFilename, JSON.stringify(output, null), function (err)
{
if (err)
{
console.log(err);
} else
{
console.log("JSON saved to " + outputFilename);
}
});
}
loadDeals();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment