Skip to content

Instantly share code, notes, and snippets.

@r-sal
Created April 20, 2013 02:45
Show Gist options
  • Save r-sal/5424515 to your computer and use it in GitHub Desktop.
Save r-sal/5424515 to your computer and use it in GitHub Desktop.
// js-ajax-parallel-load-pages.js
// Deps: jQuery, underscore, async.js
// Gets HTML from all URLs, parses it, makes DOM query with CSS selector and returns the info.
// urls - urls to get data
// selector - DOM query that will be executed on every url HTML
// itemHandler - function that will be executed on every matched element with selector
//
// Examples:
//
// loadPages(['/1.html', '/2.html'], '.user > span', function(span, url) {...})
//
// Returns Deferred.
var loadPagesInParallel = function(urls, selector, itemHandler) {
if (itemHandler == null) itemHandler = function(item) {return item};
console.log('Loading', urls.length, 'urls');
var requests = urls.map(function(url) {
var promise = jQuery.get(url);
promise.url = url;
return promise;
});
var promise = jQuery.Deferred();
if (urls.length === 0) {
promise.resolveWith(promise, []);
return promise;
}
jQuery.when.apply(jQuery, requests)
.fail(function() {
console.error('Failed', urls)
promise.reject();
})
.done(function() {
var requestsUrls = requests.map(function(request) {return request.url});
var nodes = requests
.map(function(request) {
return $(request.responseText.trim());
})
.map(function(dom) {
return dom.find(selector);
})
.map(function(jQueryList) {
return [].slice.call(jQueryList);
})
.map(function(nodes, index) {
return itemHandler(nodes, requestsUrls[index]);
});
var result = _.object(requestsUrls, nodes);
promise.resolveWith(promise, [result]);
});
return promise;
};
var paginate = function(total, perPage, paginator) {
var pages = Math.floor(total / perPage) + 1;
var current = pages;
var from = 0;
var to = perPage;
while (current--) {
paginator(from, to);
from += perPage;
to += perPage;
}
};
var loadPagesSequentally = function(urls, selector, itemHandler, perRequest) {
if (perRequest == null) perRequest = 25;
var promise = $.Deferred();
var calls = [];
var results = [];
var queue = async.queue(function(fn, callback) {
fn().done(function(result) {
results.push(result);
callback();
});
}, 1);
paginate(urls.length, perRequest, function(from, to) {
calls.push(function() {
return loadPagesInParallel(urls.slice(from, to), selector, itemHandler);
});
});
queue.push(calls);
queue.drain = function() {
var flattened = results.reduce(function(accumulator, obj) {
for (var key in obj) {
accumulator[key] = obj[key];
}
return accumulator;
}, {});
promise.resolveWith(promise, [flattened])
};
return promise;
};
// Create a collection where elements are stored as
// object keys and their counts are stored as object values.
var createCounter = function(array) {
return array.reduce(function(map, key) {
if (({}).hasOwnProperty.call(map, key)) {
map[key] += 1;
} else {
map[key] = 1;
}
return map;
}, {});
};
var printStats = function(names) {
var repoPulls = createCounter(names);
var uniqueNames = Object.keys(repoPulls);
var stats = uniqueNames
.map(function(key) {return [key, repoPulls[key]]})
.sort(function(a, b) {return b[1] - a[1];})
.map(function(list) {return list.join(' - ');});
var text = 'Total ' + names.length + ' pull requests in ' + uniqueNames.length + ' repos.';
console.group(text);
stats.forEach(function(stat) {
console.log(stat);
});
console.groupEnd(text);
};
var parsePullRequest = function(url, node) {
return {
isPrivate: Boolean(node.querySelector('.mega-icon-private-repo')),
isMerged: Boolean(node.querySelector('.state-indicator.merged')),
stars: parseInt(node.querySelector('.social-count').innerText.trim().replace(',', ''), 10),
url: url
};
};
// Pages.
var countPullRequests = function() {
var getUrl = function(page) {
return 'https://github.com/dashboard/pulls?direction=desc&page=' + page + '&sort=created&state=closed'
};
var urls = _.range(1, 10).map(getUrl);
var pullsUrls = [];
var pullsData = [];
var invalidRepos = [
'brunch/brunch', 'chaplinjs/chaplin', 'dcramer/mangodb',
'lifesinger/lifesinger.github.com', 'blackrabbit99/kievjs'
];
loadPagesSequentally(urls, '.pulls-list > .list-browser-item h3 a', function(nodes) {
var newPullsUrls = nodes.map(function(node) {return node.href;});
pullsUrls = pullsUrls.concat(newPullsUrls);
return pullsUrls;
}).then(function() {
return loadPagesSequentally(pullsUrls, '.site', function(nodes, url) {
pullsData.push(parsePullRequest(url, nodes[0]));
});
}).then(function() {
var legitPulls = pullsData
.filter(function(_) {return _.isMerged})
.filter(function(_) {return !_.isPrivate})
.filter(function(_) {return _.url.indexOf('paulmillr') === -1})
.filter(function(_) {
return invalidRepos.some(function(invalidRepo) {
return _.url.indexOf(invalidRepo) === -1;
});
})
.reduce(function(groups, _) {
var repo = /github\.com\/(.+)\/pull/.exec(_.url)[1];
if (!({}).hasOwnProperty.call(groups, repo)) {
groups[repo] = {stars: _.stars, pulls: []};
}
groups[repo].pulls.push(_.url);
return groups;
}, {});
var additionalPulls = {
'josh/pygments': {
url: 'http://pygments.org',
stars: 900,
pulls: ['https://bitbucket.org/birkenfeld/pygments-main/pull-request/84/']
}
};
_.extend(legitPulls, additionalPulls);
var sorted = Object.keys(legitPulls)
.sort(function(a, b) {
return legitPulls[b].stars - legitPulls[a].stars;
})
.map(function(_) {
var item = legitPulls[_];
item.repo = _;
return item;
});
// window.sorted = sorted;
// console.log(JSON.stringify(sorted));
var html = sorted.reduce(function(str, _) {
var url = _.url ? _.url : 'https://github.com/' + _.repo;
str += '\n <li><a href="' + url + '">';
str += _.repo.split('/')[1];
str += '</a></li>';
return str;
}, '<ul>') + '\n</ul>';
});
};
var getTopUsersByFollowers = function() {
var getUrl = function(page) {
return 'https://github.com/search?l=&p=' + page + '&q=followers%3A%3E100&ref=searchbar&type=Users';
};
var urls = _.range(1, 100).map(getUrl);
var promise = loadPagesSequentally(urls, '#code_search_results h2 > a', function(nodes) {
return nodes.map(function(node) {return node.innerText});
});
promise.done(function(data) {
var values = Object.keys(data).map(function(key) {return data[key]});
var flattened = [].concat.apply([], values);
window.topUsers = flattened;
});
};
// users - getTopUsersByFollowers() result.
// users - getTopUsersByFollowers() result.
var getUsersContributions = function(users) {
window.usersData = {};
loadPagesSequentally(users.map(function(user) {return '/' + user}), '.contrib-day > .num', function(nodes, url) {
var node, res;
if (nodes.length > 0) {
node = nodes[0];
res = parseInt(node.innerText.replace(' Total', '').replace(',', ''));
} else {
res = 0;
}
window.usersData[url.replace('/', '')] = res;
return res;
});
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment