Created
April 20, 2013 02:45
-
-
Save r-sal/5424515 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// js-ajax-parallel-load-pages.js | |
// Deps: jQuery, underscore, async.js | |
// Gets HTML from all URLs, parses it, makes DOM query with CSS selector and returns the info. | |
// urls - urls to get data | |
// selector - DOM query that will be executed on every url HTML | |
// itemHandler - function that will be executed on every matched element with selector | |
// | |
// Examples: | |
// | |
// loadPages(['/1.html', '/2.html'], '.user > span', function(span, url) {...}) | |
// | |
// Returns Deferred. | |
var loadPagesInParallel = function(urls, selector, itemHandler) { | |
if (itemHandler == null) itemHandler = function(item) {return item}; | |
console.log('Loading', urls.length, 'urls'); | |
var requests = urls.map(function(url) { | |
var promise = jQuery.get(url); | |
promise.url = url; | |
return promise; | |
}); | |
var promise = jQuery.Deferred(); | |
if (urls.length === 0) { | |
promise.resolveWith(promise, []); | |
return promise; | |
} | |
jQuery.when.apply(jQuery, requests) | |
.fail(function() { | |
console.error('Failed', urls) | |
promise.reject(); | |
}) | |
.done(function() { | |
var requestsUrls = requests.map(function(request) {return request.url}); | |
var nodes = requests | |
.map(function(request) { | |
return $(request.responseText.trim()); | |
}) | |
.map(function(dom) { | |
return dom.find(selector); | |
}) | |
.map(function(jQueryList) { | |
return [].slice.call(jQueryList); | |
}) | |
.map(function(nodes, index) { | |
return itemHandler(nodes, requestsUrls[index]); | |
}); | |
var result = _.object(requestsUrls, nodes); | |
promise.resolveWith(promise, [result]); | |
}); | |
return promise; | |
}; | |
var paginate = function(total, perPage, paginator) { | |
var pages = Math.floor(total / perPage) + 1; | |
var current = pages; | |
var from = 0; | |
var to = perPage; | |
while (current--) { | |
paginator(from, to); | |
from += perPage; | |
to += perPage; | |
} | |
}; | |
var loadPagesSequentally = function(urls, selector, itemHandler, perRequest) { | |
if (perRequest == null) perRequest = 25; | |
var promise = $.Deferred(); | |
var calls = []; | |
var results = []; | |
var queue = async.queue(function(fn, callback) { | |
fn().done(function(result) { | |
results.push(result); | |
callback(); | |
}); | |
}, 1); | |
paginate(urls.length, perRequest, function(from, to) { | |
calls.push(function() { | |
return loadPagesInParallel(urls.slice(from, to), selector, itemHandler); | |
}); | |
}); | |
queue.push(calls); | |
queue.drain = function() { | |
var flattened = results.reduce(function(accumulator, obj) { | |
for (var key in obj) { | |
accumulator[key] = obj[key]; | |
} | |
return accumulator; | |
}, {}); | |
promise.resolveWith(promise, [flattened]) | |
}; | |
return promise; | |
}; | |
// Create a collection where elements are stored as | |
// object keys and their counts are stored as object values. | |
var createCounter = function(array) { | |
return array.reduce(function(map, key) { | |
if (({}).hasOwnProperty.call(map, key)) { | |
map[key] += 1; | |
} else { | |
map[key] = 1; | |
} | |
return map; | |
}, {}); | |
}; | |
var printStats = function(names) { | |
var repoPulls = createCounter(names); | |
var uniqueNames = Object.keys(repoPulls); | |
var stats = uniqueNames | |
.map(function(key) {return [key, repoPulls[key]]}) | |
.sort(function(a, b) {return b[1] - a[1];}) | |
.map(function(list) {return list.join(' - ');}); | |
var text = 'Total ' + names.length + ' pull requests in ' + uniqueNames.length + ' repos.'; | |
console.group(text); | |
stats.forEach(function(stat) { | |
console.log(stat); | |
}); | |
console.groupEnd(text); | |
}; | |
var parsePullRequest = function(url, node) { | |
return { | |
isPrivate: Boolean(node.querySelector('.mega-icon-private-repo')), | |
isMerged: Boolean(node.querySelector('.state-indicator.merged')), | |
stars: parseInt(node.querySelector('.social-count').innerText.trim().replace(',', ''), 10), | |
url: url | |
}; | |
}; | |
// Pages. | |
var countPullRequests = function() { | |
var getUrl = function(page) { | |
return 'https://github.com/dashboard/pulls?direction=desc&page=' + page + '&sort=created&state=closed' | |
}; | |
var urls = _.range(1, 10).map(getUrl); | |
var pullsUrls = []; | |
var pullsData = []; | |
var invalidRepos = [ | |
'brunch/brunch', 'chaplinjs/chaplin', 'dcramer/mangodb', | |
'lifesinger/lifesinger.github.com', 'blackrabbit99/kievjs' | |
]; | |
loadPagesSequentally(urls, '.pulls-list > .list-browser-item h3 a', function(nodes) { | |
var newPullsUrls = nodes.map(function(node) {return node.href;}); | |
pullsUrls = pullsUrls.concat(newPullsUrls); | |
return pullsUrls; | |
}).then(function() { | |
return loadPagesSequentally(pullsUrls, '.site', function(nodes, url) { | |
pullsData.push(parsePullRequest(url, nodes[0])); | |
}); | |
}).then(function() { | |
var legitPulls = pullsData | |
.filter(function(_) {return _.isMerged}) | |
.filter(function(_) {return !_.isPrivate}) | |
.filter(function(_) {return _.url.indexOf('paulmillr') === -1}) | |
.filter(function(_) { | |
return invalidRepos.some(function(invalidRepo) { | |
return _.url.indexOf(invalidRepo) === -1; | |
}); | |
}) | |
.reduce(function(groups, _) { | |
var repo = /github\.com\/(.+)\/pull/.exec(_.url)[1]; | |
if (!({}).hasOwnProperty.call(groups, repo)) { | |
groups[repo] = {stars: _.stars, pulls: []}; | |
} | |
groups[repo].pulls.push(_.url); | |
return groups; | |
}, {}); | |
var additionalPulls = { | |
'josh/pygments': { | |
url: 'http://pygments.org', | |
stars: 900, | |
pulls: ['https://bitbucket.org/birkenfeld/pygments-main/pull-request/84/'] | |
} | |
}; | |
_.extend(legitPulls, additionalPulls); | |
var sorted = Object.keys(legitPulls) | |
.sort(function(a, b) { | |
return legitPulls[b].stars - legitPulls[a].stars; | |
}) | |
.map(function(_) { | |
var item = legitPulls[_]; | |
item.repo = _; | |
return item; | |
}); | |
// window.sorted = sorted; | |
// console.log(JSON.stringify(sorted)); | |
var html = sorted.reduce(function(str, _) { | |
var url = _.url ? _.url : 'https://github.com/' + _.repo; | |
str += '\n <li><a href="' + url + '">'; | |
str += _.repo.split('/')[1]; | |
str += '</a></li>'; | |
return str; | |
}, '<ul>') + '\n</ul>'; | |
}); | |
}; | |
var getTopUsersByFollowers = function() { | |
var getUrl = function(page) { | |
return 'https://github.com/search?l=&p=' + page + '&q=followers%3A%3E100&ref=searchbar&type=Users'; | |
}; | |
var urls = _.range(1, 100).map(getUrl); | |
var promise = loadPagesSequentally(urls, '#code_search_results h2 > a', function(nodes) { | |
return nodes.map(function(node) {return node.innerText}); | |
}); | |
promise.done(function(data) { | |
var values = Object.keys(data).map(function(key) {return data[key]}); | |
var flattened = [].concat.apply([], values); | |
window.topUsers = flattened; | |
}); | |
}; | |
// users - getTopUsersByFollowers() result. | |
// users - getTopUsersByFollowers() result. | |
var getUsersContributions = function(users) { | |
window.usersData = {}; | |
loadPagesSequentally(users.map(function(user) {return '/' + user}), '.contrib-day > .num', function(nodes, url) { | |
var node, res; | |
if (nodes.length > 0) { | |
node = nodes[0]; | |
res = parseInt(node.innerText.replace(' Total', '').replace(',', '')); | |
} else { | |
res = 0; | |
} | |
window.usersData[url.replace('/', '')] = res; | |
return res; | |
}); | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment