Skip to content

Instantly share code, notes, and snippets.

@slaporte
Created August 18, 2012 20:39
Show Gist options
  • Save slaporte/3389746 to your computer and use it in GitHub Desktop.
Save slaporte/3389746 to your computer and use it in GitHub Desktop.
var GLOBAL_TIMEOUT = 26000; // milliseconds
var jsdom = require('jsdom');
var dummy_window = jsdom.jsdom().createWindow();
dummy_window.XMLHttpRequest = require('xmlhttprequest').XMLHttpRequest;
var jq_lib = require('jquery');
var $ = jq_lib.create(dummy_window);
$.support.cors = true;
var extend = $.extend;
var successful_queries = 0;
var failed_queries = 0;
var complete_queries = 0;
function do_query(url, complete_callback, kwargs) {
var all_kwargs = {
url: url,
type: 'get',
dataType: 'json',
timeout: GLOBAL_TIMEOUT, //for detecting jsonp errors
success: function(data) {
successful_queries++;
if (successful_queries % 20 === 0) {
var util = require('util');
}
complete_callback(null, data);
},
error: function(jqXHR, textStatus, errorThrown) {
failed_queries++;
complete_callback(errorThrown, null);
},
complete: function(jqXHR, textStatus) {
// TODO: error handling (jsonp doesn't get error() calls for a lot of errors)
complete_queries++;
},
headers: { 'User-Agent': 'QualityVis/0.0.0 Mahmoud Hashemi makuro@gmail.com' }
};
for (var key in kwargs) {
if(kwargs.hasOwnProperty(key)) {
all_kwargs[key] = kwargs[key];
}
}
$.ajax(all_kwargs);
}
function keys(obj) {
var ret = [];
for(var k in obj) {
if (obj.hasOwnProperty(k)) {
ret.push(k);
}
}
return ret;
}
function parse_date_string(stamp) {
// um, javascript, why start months at 0?
return new Date(stamp.substring(0, 4), stamp.substring(4, 6) - 1, stamp.substring(6, 8) - 1, stamp.substring(8, 10) - 1, stamp.substring(10, 12) - 1)
}
do_query('http://ortelius.toolserver.org:8089/revisions/Coffee', function(err, data){
var ret = {};
var revisions = data['result'];
var now = new Date();
ret.first_edit_date = parse_date_string(revisions[0]['rev_timestamp']);
age = now - ret.first_edit_date
ret.age = age.toString();
ret.most_recent_edit_date = parse_date_string(revisions[revisions.length - 1]['rev_timestamp']);
ret.most_recent_edit_age = now - ret.most_recent_edit_date;
function get_by_period(year, month) {
var by_period = [];
if(month === false) {
for (var i = 0; i < revisions.length; i++) {
var rev = revisions[i]['rev_timestamp'];
if(rev.substring(0, 4) == year) {
by_period.push(revisions[i]);
}
}
} else {
for (var i = 0; i < revisions.length; i++) {
var rev = revisions[i]['rev_timestamp'];
if(rev.substring(0, 6) == String(year) + String("0" + month).slice(-2)) {
by_period.push(revisions[i]);
}
}
}
return by_period;
}
function get_since(day_limit) {
var recent_revs = [];
var threshold = new Date();
threshold.setDate(threshold.getDate() - day_limit);
for (var i = 0; i < revisions.length; i++) {
if(parse_date_string(revisions[i]['rev_timestamp']) > threshold) {
recent_revs.push(revisions[i]);
}
}
return recent_revs;
}
function get_average_time_between_edits(revs) {
var times = [];
if(!revs) {
revs = revisions;
}
for (var i = 0; i < revs.length; i++) {
times.push(parse_date_string(revs[i]['rev_timestamp']))
}
time_diffs = [];
for (var i = 0; i < times.length; i++) {
if (i != 0) {
time_diffs.push(times[i] - times[i - 1])
}
}
if(time_diffs.length != 0) {
return time_diffs
} else {
return false
}
}
function get_edits_per_day(revs) {
var times = [];
if(!revs) {
revs = revisions;
}
// todo
}
function get_average_length(revs) {
var lengths = [];
if(!revs) {
revs = revisions;
}
if (revs.length !== 0) {
for (var i = 0; i < revs.length; i++) {
lengths.push(revs[i]['rev_len']);
}
}
return lengths
}
function get_revert_estimate(revs) {
var reverts = 0;
if(!revs) {
revs = revisions;
}
for (var i = 0; i < revs.length; i++) {
if (revs[i]['rev_comment'].toLowerCase().indexOf('revert') != -1) {
reverts += 1;
}
}
return reverts;
}
function get_revision_total(revs) {
if(!revs) {
revs = revisions;
}
return revs.length
}
function get_minor_count(revs) {
var minor = 0;
if(!revs) {
revs = revisions;
}
for (var i = 0; i < revs.length; i++) {
minor += revs[i]['rev_minor_edit']
}
return minor;
}
function get_anon_count(revs) {
var anon = 0
if(!revs) {
revs = revisions;
}
for (var i = 0; i < revs.length; i++) {
if (revs[i]['rev_user'] == 0) {
anon += 1;
}
}
return anon;
}
function get_editor_counts(revs) {
var authors = {}
if(!revs) {
revs = revisions;
}
for (var i = 0; i < revs.length; i++) {
var rev = revs[i];
var user = rev['rev_user_text'];
if (authors[user]) {
authors[user] += 1;
} else {
authors[user] = 1;
}
}
return authors;
}
function get_editor_count(revs) {
if(!revs) {
revs = revisions;
}
return get_editor_counts(revs).length;
}
function get_editor_bytes(revs){
authors = {}
if(!revs) {
revs = revisions;
}
for (var i = 0; i < revs.length; i++) {
var rev = revs[i];
var user = rev['rev_user_text'];
if (authors[user]) {
// actually, should subtract from previous rev
authors[user].push(rev['rev_len']);
} else {
authors[user] = [rev['rev_len']];
}
}
return authors;
}
function get_some_editors(num, revs) {
var selected = {};
if(!revs) {
revs = revisions;
}
var authors = get_editor_counts(revs);
for (var author in authors) {
if(authors[author] > num) {
selected[author] = authors[author];
}
}
return selected;
}
function get_top_editors(revs) {
var top = [];
if (!revs) {
revs = revisions;
}
var authors = get_editor_counts(revs);
for (var k in authors) {
if (authors.hasOwnProperty(k)) top.push({user: k, count: authors[k]});
}
top.sort(function(o1, o2) {
return o2.count - o1.count;
});
return top;
}
function get_top_percent_editors(top, revs){
if(!revs) {
revs = revisions;
}
if(!top) {
top = 0.20;
}
var authors = get_editor_counts(revs);
if(authors.length !== 0) {
var threshold = keys(authors).length * top;
var top_editors = get_top_editors(revs).slice(threshold);
var total = 0;
for (var i = 0; i < top_editors.length; i++) {
total += top_editors[i]['count'];
}
return total / revs.length;
} else {
return 0;
}
}
ret['total_revisions'] = get_revision_total();
ret['minor_count'] = get_minor_count();
ret['byte_count'] = get_editor_bytes();
ret['IP_edit_count'] = get_anon_count();
ret['average_time_between_edits'] = get_average_time_between_edits();
ret['editors_five_plus_edits'] = get_some_editors(5).length;
ret['top_20_percent'] = get_top_percent_editors();
ret['top_5_percent'] = get_top_percent_editors(.05);
ret['total_editors'] = get_editor_count();
ret['average_length'] = get_average_length();
ret['reverts_estimate'] = get_revert_estimate();
ret['last_30_days_total_revisions'] = get_revision_total(get_since(30));
ret['last_30_days_minor_count'] = get_minor_count(get_since(30));
ret['last_30_days_IP_edit_count'] = get_anon_count(get_since(30));
ret['last_30_days_average_time_between_edits'] = get_average_time_between_edits(get_since(30));
ret['last_30_days_editors_five_plus_edits'] = keys(get_some_editors(5, get_since(30))).length;
ret['last_30_days_top_20_percent'] = get_top_percent_editors(.20, get_since(30));
ret['last_30_days_top_5_percent'] = get_top_percent_editors(.05, get_since(30));
ret['last_30_days_total_editors'] = get_editor_count(get_since(30));
ret['last_30_days_average_length'] = get_average_length(get_since(30));
ret['last_30_days_reverts_estimate'] = get_revert_estimate(get_since(30));
console.log(ret);
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment