-
-
Save nmalkin/2890222 to your computer and use it in GitHub Desktop.
kpiggybank data simulator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
/** | |
* Create sample interaction data, using valid kpiggybank | |
* data as a seed. | |
* | |
* Can use from the command line, like so: | |
* | |
* node sample.js 42 # gets 42 data blobs | |
* | |
* Or from a module: | |
* | |
* var sampler = require('./sampler'); | |
* | |
* // Generate 10000 data points based on kpi data | |
* sampler.generate(10000, function(data) { ... }); | |
* | |
* The program will always read all KPI data from kpiggybank, | |
* and create extra sample data as necessary by jittering original | |
* data blobs. It ensures that each event takes at least about | |
* 1/4 second. | |
*/ | |
var http = require('http'); | |
var KPIGGYBANK = 'kpiggybank.hacksign.in'; | |
var JITTER_FACTOR = 0.2; | |
/** | |
* Download data from kpiggybank.hacksign.in | |
*/ | |
function getDataDump(callback) { | |
var data = ''; | |
http.get({ | |
host: KPIGGYBANK, | |
path: '/wsapi/interaction_data' | |
}, function(res) { | |
res.on('data', function(chunk) { | |
data += chunk; | |
}); | |
res.on('end', function() { | |
return callback(JSON.parse(data.toString())); | |
}); | |
}); | |
} | |
/** | |
* Extract the kpi blob from the couch record, keeping | |
* only those that have all the kpi fields we want. | |
* (Some may be missing timestamp, user_agent, etc.) | |
* | |
* Accepts obj as a dictionary of data; calls back with | |
* a list. | |
*/ | |
function filterDataDump(obj, callback) { | |
var goodData = []; | |
var blob; | |
Object.keys(obj).forEach(function(key) { | |
blob = obj[key].value; | |
if (typeof blob.event_stream === 'object' && | |
typeof blob.user_agent === 'object' && | |
blob.user_agent.os !== 'Undefined' && | |
blob.number_sites_logged_in) { | |
goodData.push(blob); | |
} | |
}); | |
return callback(goodData); | |
} | |
/** | |
* Jitter each event in a stream | |
*/ | |
function jitterEvents(eventStream) { | |
var newStream = []; | |
var jitter; | |
var fastest; | |
eventStream.forEach(function(tuple, index) { | |
// Note - this will make xhr events look like they took | |
// at least 1/4 sec. I'm assuming you don't care about | |
// xhr events. | |
jitter = Math.floor(tuple[1] * (Math.random() - 0.5) * (JITTER_FACTOR * 2)); | |
fastest = 250 + Math.floor((Math.random() - 0.5) * 100); | |
newStream.push([ | |
tuple[0], // the original event name | |
Math.max(fastest, tuple[1] + jitter) // Not about 1/4 sec | |
]); | |
}); | |
return newStream; | |
} | |
/** | |
* Take each data point and multiply it a number of times, | |
* each time jittering the data slightly. | |
*/ | |
function amplifyData(list, times, callback) { | |
var moreData = []; | |
var blob; | |
var i; | |
list.forEach(function(blob, index) { | |
moreData.push(blob); | |
for (i=0; i<times; i++) { | |
moreData.push({ | |
_id: blob._id + '-' + i, // tag as a dup | |
event_stream: jitterEvents(blob.event_stream), | |
sample_rate: blob.sample_rate, | |
timestamp: blob.timestamp, // already rounded off | |
lang: blob.lang, | |
number_sites_logged_in: blob.number_sites_logged_in, | |
user_agent: blob.user_agent | |
}); | |
} | |
}); | |
return callback(moreData); | |
} | |
var generate = module.exports.generate = function generate(wantCount, callback) { | |
getDataDump(function(obj) { | |
filterDataDump(obj, function(data) { | |
if (wantCount > data.length) { | |
var times = Math.ceil(wantCount / data.length); | |
amplifyData(data, times, function(moreData) { | |
return callback(moreData.slice(0, wantCount)); | |
}); | |
} else { | |
return callback(data.slice(0, wantCount)); | |
} | |
}); | |
}); | |
} | |
if (!module.parent) { | |
var wantCount = parseInt(process.argv[process.argv.length-1], 10) || 1000; | |
generate(wantCount, function(data) { | |
process.stdout.write(JSON.stringify(data, null, 4)); | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment