Last active
May 13, 2016 08:21
-
-
Save hillar/54a7dc0b8abbdf03cc07 to your computer and use it in GitHub Desktop.
moloch sessions to midi
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ES6 | |
/* | |
moloch sessions.csv field names | |
[ 'Protocol', | |
' First Packet', | |
' Last Packet', | |
' Source IP', | |
' Source Port', | |
' Source Geo', | |
' Destination IP', | |
' Destination Port', | |
' Destination Geo', | |
' Packets', | |
' Bytes', | |
' Data Bytes', | |
' Node' ] | |
*/ | |
const STARTCOLUMN = ' First Packet'; | |
const ENDCOLUMN = ' Last Packet'; | |
const DATACOLUMNS = [' Packets',' Bytes',' Data Bytes']; | |
const SPLITTER = ','; | |
const LINEEND = '\n'; | |
const BUCKETSIZE = 1; // compresion rate, how many time units to one bucket | |
/* | |
reads the CSV file and return : | |
int min start; | |
int max end; | |
array of rows | |
sessions: [ { start: 1, duration: 1, data: [ '1', '15', '0' ] }, | |
{ start: 2, duration: 1, data: [ '2', '14', '0' ] }, | |
... | |
*/ | |
function readRows(filename, splitter=SPLITTER,startcolumn=STARTCOLUMN,endcolumn=ENDCOLUMN,datacolumns=DATACOLUMNS){ | |
/* | |
helper function to parse rows | |
needs to know splitter, startcolumn, endcolumn, datacolumns | |
read string and returns: | |
int start | |
int duration | |
array data columns asis [dataclumn1,..,datacolumnn] | |
*/ | |
function parseRow(row,splitter,startcolumn,endcolumn,datacolumns ) { | |
var bites = row.split(splitter); | |
var start = parseInt(bites[startcolumn]); | |
var end = parseInt(bites[endcolumn]); | |
var duration = (end - start); | |
var data = []; | |
for (var column in datacolumns ){ | |
data.push(parseInt(bites[datacolumns[column]])); | |
} | |
return {start,duration,data}; | |
} | |
var fs = require('fs'); | |
var lines = fs.readFileSync(filename).toString().split(LINEEND); | |
var header = lines.shift().split(splitter); // take the first line as header | |
var startcolumnnumber = header.indexOf(startcolumn); | |
var endcolumnnumber = header.indexOf(endcolumn); | |
var datacolumnnumbers = []; | |
for (var column in datacolumns ){ | |
var columnnumber = header.indexOf(datacolumns[column]); | |
datacolumnnumbers.push(columnnumber); | |
} | |
var parsedRows = []; | |
var minstart = Infinity; | |
var maxend = -Infinity; | |
while ( lines.length > 1 ) { | |
var line = lines.shift(); | |
var {start,duration,data} = parseRow(line, splitter,startcolumnnumber,endcolumnnumber,datacolumnnumbers); | |
minstart = Math.min(start,minstart); | |
maxend = Math.max( start + duration ,maxend); | |
parsedRows.push({start,duration,data}); | |
} | |
return {start:minstart, end:maxend, sessions:parsedRows}; | |
} | |
/* | |
trim data column names | |
*/ | |
function trimNames(arr){ | |
var ret = []; | |
for (var string in arr) { | |
ret.push(arr[string].trim().replace(/ /g,'_')); | |
} | |
return ret; | |
} | |
/* | |
creates timelined buckets and fill those with sessions data | |
each bucket is array (by default empty) | |
all matchinh sessions will fill corresponding buckets | |
needs begin of youngest session | |
end of oldest session | |
sessions | |
datacolumnames | |
bucketsize | |
returns buckets | |
buckets [ { Packets: [ '1', '2', '3', '4' ], | |
Bytes: [ '15', '14', '13', '12' ], | |
Data_Bytes: [ '0', '0', '0', '0' ] }, | |
{ Packets: [ '4', '5', '6', '7' ], | |
.... | |
*/ | |
function rows2buckets(begin,end,sessions,datacolumnames=DATACOLUMNS,bucketsize=BUCKETSIZE){ | |
var names = trimNames(datacolumnames); | |
//prepare bucketed timeline | |
var bucketscount = ~~(((end - begin)/bucketsize) + 1); // round it to next integer | |
var step = (end - begin) / bucketscount; | |
var buckets = []; | |
for (var i = 0; i < bucketscount; i++) { | |
var dummy = {}; | |
for (var name in names){ | |
dummy[names[name]] = []; | |
} | |
for (var name in names){ | |
dummy[names[name]+'_pertimeunit'] = []; | |
} | |
buckets.push(dummy); | |
} | |
while ( sessions.length > 1 ) { | |
var {start, duration, data} = sessions.shift(); | |
var firstbucket = ~~((start - begin) / step); | |
var lastbucket = ~~((start - begin + duration) / step) ; //TODO, handle case when oldest session length is 0 | |
var bucketsinthissession = lastbucket - firstbucket; | |
for (var n = firstbucket; (n <= lastbucket && n < bucketscount); n++){ | |
for (var name in names){ | |
buckets[n][names[name]].push(data[name]); | |
} | |
for (var name in names){ | |
buckets[n][names[name]+'_pertimeunit'].push(data[name]/(duration+1)); | |
} | |
} | |
} | |
return buckets; | |
} | |
var atoll = require('atoll'); | |
/* | |
calculate some basic stats on buckets | |
(see https://github.com/nsfmc/atoll.js) | |
to handle empty buckets modified variant of atoll is used | |
see https://github.com/hillar/atoll.js/commit/fe0deac644f70d9daadc2d42f9674ba70283871d | |
*/ | |
function bucketstats(buckets){ | |
/* | |
helper to get function names what return numeric results | |
*/ | |
function _atollNumericFunctions(){ | |
var ret = {}; | |
var obj = atoll([1,2,3,4,5,6]); | |
for (var prop in obj) { | |
if (typeof(obj[prop]) === 'function') { | |
try { | |
var tmp = obj[prop](); | |
} catch(err) { | |
//noop | |
} finally { | |
if ( typeof(tmp) === 'number') { | |
ret[prop] = NaN; | |
} | |
} | |
} | |
} | |
return ret; | |
} | |
var functions = _atollNumericFunctions(); | |
var stats = {}; | |
while ( buckets.length > 1 ) { | |
var bucket = buckets.shift(); | |
for (var column in bucket){ | |
var stat = atoll(bucket[column],true); | |
for (var f in functions) { | |
var trackname = column +'_'+ f; | |
if (!stats[trackname]){ | |
stats[trackname] = []; | |
} | |
if (bucket[column].length > 0) { | |
stats[trackname].push(stat[f]()); | |
} else { | |
stats[trackname].push(NaN); | |
} | |
} | |
} | |
} | |
return stats; | |
} | |
/* | |
removes horisontal straigths | |
example [0,0,0,0,0,0,0,0,0,0,0] | |
*/ | |
function removeStraigth(stats) { | |
for (var s in stats) { | |
var ss = atoll(stats[s], true); | |
var min = ss.min(); | |
var max = ss.max(); | |
if (isNaN(min) || min == max) { | |
//console.log('straight',s, stats[s]) | |
delete stats[s] | |
} | |
} | |
return stats; | |
} | |
/* | |
scales all values to be in between 0 and N | |
keeps empty bin's | |
*/ | |
function scaleto(stats,scale){ | |
for (var s in stats) { | |
var scaled = []; | |
var ss = atoll(stats[s], true); | |
var min = ss.min(); | |
var max = ss.max(); | |
var hitsRange = max - min; | |
for (var i in stats[s] ) { | |
if (!isNaN(stats[s][i])) { | |
scaled[i] = ~~((stats[s][i] - min) / (hitsRange/scale)); | |
} else { | |
scaled[i] = undefined; | |
} | |
} | |
stats[s] = scaled; | |
} | |
return stats; | |
} | |
function printstats(stats){ | |
for (var s in stats) { | |
var tmp = s+': ['; | |
for (var i in stats[s] ) { | |
tmp += stats[s][i] + ','; | |
} | |
tmp += ']'; | |
console.log(tmp); | |
} | |
} | |
var fname = 'fixtures.csv'; | |
// file name as first command line argument | |
if ( process.argv[2] ){ | |
fname = process.argv[2]; | |
} | |
var {start,end,sessions} = readRows(fname); | |
var stats = scaleto(removeStraigth(bucketstats(rows2buckets(start,end,sessions))),256); | |
printstats(stats); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment