Skip to content

Instantly share code, notes, and snippets.

@hillar
Last active May 13, 2016 08:21
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hillar/54a7dc0b8abbdf03cc07 to your computer and use it in GitHub Desktop.
Save hillar/54a7dc0b8abbdf03cc07 to your computer and use it in GitHub Desktop.
moloch sessions to midi
// ES6
/*
moloch sessions.csv field names
[ 'Protocol',
' First Packet',
' Last Packet',
' Source IP',
' Source Port',
' Source Geo',
' Destination IP',
' Destination Port',
' Destination Geo',
' Packets',
' Bytes',
' Data Bytes',
' Node' ]
*/
const STARTCOLUMN = ' First Packet';
const ENDCOLUMN = ' Last Packet';
const DATACOLUMNS = [' Packets',' Bytes',' Data Bytes'];
const SPLITTER = ',';
const LINEEND = '\n';
const BUCKETSIZE = 1; // compresion rate, how many time units to one bucket
/*
reads the CSV file and return :
int min start;
int max end;
array of rows
sessions: [ { start: 1, duration: 1, data: [ '1', '15', '0' ] },
{ start: 2, duration: 1, data: [ '2', '14', '0' ] },
...
*/
function readRows(filename, splitter=SPLITTER,startcolumn=STARTCOLUMN,endcolumn=ENDCOLUMN,datacolumns=DATACOLUMNS){
/*
helper function to parse rows
needs to know splitter, startcolumn, endcolumn, datacolumns
read string and returns:
int start
int duration
array data columns asis [dataclumn1,..,datacolumnn]
*/
function parseRow(row,splitter,startcolumn,endcolumn,datacolumns ) {
var bites = row.split(splitter);
var start = parseInt(bites[startcolumn]);
var end = parseInt(bites[endcolumn]);
var duration = (end - start);
var data = [];
for (var column in datacolumns ){
data.push(parseInt(bites[datacolumns[column]]));
}
return {start,duration,data};
}
var fs = require('fs');
var lines = fs.readFileSync(filename).toString().split(LINEEND);
var header = lines.shift().split(splitter); // take the first line as header
var startcolumnnumber = header.indexOf(startcolumn);
var endcolumnnumber = header.indexOf(endcolumn);
var datacolumnnumbers = [];
for (var column in datacolumns ){
var columnnumber = header.indexOf(datacolumns[column]);
datacolumnnumbers.push(columnnumber);
}
var parsedRows = [];
var minstart = Infinity;
var maxend = -Infinity;
while ( lines.length > 1 ) {
var line = lines.shift();
var {start,duration,data} = parseRow(line, splitter,startcolumnnumber,endcolumnnumber,datacolumnnumbers);
minstart = Math.min(start,minstart);
maxend = Math.max( start + duration ,maxend);
parsedRows.push({start,duration,data});
}
return {start:minstart, end:maxend, sessions:parsedRows};
}
/*
trim data column names
*/
function trimNames(arr){
var ret = [];
for (var string in arr) {
ret.push(arr[string].trim().replace(/ /g,'_'));
}
return ret;
}
/*
creates timelined buckets and fill those with sessions data
each bucket is array (by default empty)
all matchinh sessions will fill corresponding buckets
needs begin of youngest session
end of oldest session
sessions
datacolumnames
bucketsize
returns buckets
buckets [ { Packets: [ '1', '2', '3', '4' ],
Bytes: [ '15', '14', '13', '12' ],
Data_Bytes: [ '0', '0', '0', '0' ] },
{ Packets: [ '4', '5', '6', '7' ],
....
*/
function rows2buckets(begin,end,sessions,datacolumnames=DATACOLUMNS,bucketsize=BUCKETSIZE){
var names = trimNames(datacolumnames);
//prepare bucketed timeline
var bucketscount = ~~(((end - begin)/bucketsize) + 1); // round it to next integer
var step = (end - begin) / bucketscount;
var buckets = [];
for (var i = 0; i < bucketscount; i++) {
var dummy = {};
for (var name in names){
dummy[names[name]] = [];
}
for (var name in names){
dummy[names[name]+'_pertimeunit'] = [];
}
buckets.push(dummy);
}
while ( sessions.length > 1 ) {
var {start, duration, data} = sessions.shift();
var firstbucket = ~~((start - begin) / step);
var lastbucket = ~~((start - begin + duration) / step) ; //TODO, handle case when oldest session length is 0
var bucketsinthissession = lastbucket - firstbucket;
for (var n = firstbucket; (n <= lastbucket && n < bucketscount); n++){
for (var name in names){
buckets[n][names[name]].push(data[name]);
}
for (var name in names){
buckets[n][names[name]+'_pertimeunit'].push(data[name]/(duration+1));
}
}
}
return buckets;
}
var atoll = require('atoll');
/*
calculate some basic stats on buckets
(see https://github.com/nsfmc/atoll.js)
to handle empty buckets modified variant of atoll is used
see https://github.com/hillar/atoll.js/commit/fe0deac644f70d9daadc2d42f9674ba70283871d
*/
function bucketstats(buckets){
/*
helper to get function names what return numeric results
*/
function _atollNumericFunctions(){
var ret = {};
var obj = atoll([1,2,3,4,5,6]);
for (var prop in obj) {
if (typeof(obj[prop]) === 'function') {
try {
var tmp = obj[prop]();
} catch(err) {
//noop
} finally {
if ( typeof(tmp) === 'number') {
ret[prop] = NaN;
}
}
}
}
return ret;
}
var functions = _atollNumericFunctions();
var stats = {};
while ( buckets.length > 1 ) {
var bucket = buckets.shift();
for (var column in bucket){
var stat = atoll(bucket[column],true);
for (var f in functions) {
var trackname = column +'_'+ f;
if (!stats[trackname]){
stats[trackname] = [];
}
if (bucket[column].length > 0) {
stats[trackname].push(stat[f]());
} else {
stats[trackname].push(NaN);
}
}
}
}
return stats;
}
/*
removes horisontal straigths
example [0,0,0,0,0,0,0,0,0,0,0]
*/
function removeStraigth(stats) {
for (var s in stats) {
var ss = atoll(stats[s], true);
var min = ss.min();
var max = ss.max();
if (isNaN(min) || min == max) {
//console.log('straight',s, stats[s])
delete stats[s]
}
}
return stats;
}
/*
scales all values to be in between 0 and N
keeps empty bin's
*/
function scaleto(stats,scale){
for (var s in stats) {
var scaled = [];
var ss = atoll(stats[s], true);
var min = ss.min();
var max = ss.max();
var hitsRange = max - min;
for (var i in stats[s] ) {
if (!isNaN(stats[s][i])) {
scaled[i] = ~~((stats[s][i] - min) / (hitsRange/scale));
} else {
scaled[i] = undefined;
}
}
stats[s] = scaled;
}
return stats;
}
function printstats(stats){
for (var s in stats) {
var tmp = s+': [';
for (var i in stats[s] ) {
tmp += stats[s][i] + ',';
}
tmp += ']';
console.log(tmp);
}
}
var fname = 'fixtures.csv';
// file name as first command line argument
if ( process.argv[2] ){
fname = process.argv[2];
}
var {start,end,sessions} = readRows(fname);
var stats = scaleto(removeStraigth(bucketstats(rows2buckets(start,end,sessions))),256);
printstats(stats);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment