Skip to content

Instantly share code, notes, and snippets.

@jesslilly
Last active January 4, 2016 03:58
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jesslilly/8564976 to your computer and use it in GitHub Desktop.
Save jesslilly/8564976 to your computer and use it in GitHub Desktop.
Function to group an array of x,y data. Useful for charting. Requires underscore.js.
/**
* @method
* @public
* @description Take an array of objects and convert to an array of pairs whose
* xCol are grouped and yCol values are aggregated somehow. If
* grouping by day or month, dates must be in ZULU format strings!
* Original implementation returned an object with keys = xCol and
* values = yCol. It worked great but js maps(objects) cannot be
* sorted!
* @param {array}
* constInput - array of x/y values. Can include extra fields.
* @param {string}
* xCol - name of x attribute to group by.
* @param {string}
* yCol - name of y attribute to aggregate by.
* @param {string}
* groupFunction - day, month, x. Date options "fill in" missing
* values.
* @param {string}
* aggFunction - max, sum, avg
* @return {object} output - array of x/y values.
* @throws {Error} -
* when groupFunction is not passed in. Because this returns an
* object with the keys being the group, the function MUST do
* grouping otherwise there would be duplicate keys in the object
* which is not possible.
*/
function groupBy(constInput, xCol, yCol, groupFunction, aggFunction) {
// --------------------
// variable declaration
// --------------------
var output = [];
// helper map. Use to do the grouping and reference group to array index.
var hMap = {};
// Just an obj to iterate over.
var obj;
var sample = [];
var input = [];
var agg;
var ds; // date string
var minInGroup = null;
var maxInGroup = null;
// --------------------
// parameter validation
// --------------------
if (!groupFunction || !groupFunction.match(/^x$|^day$|^month$/)) {
throw "The groupFunction [" + groupFunction + "] is invalid!";
}
if (!groupFunction || !aggFunction.match(/^min$|^max$|^sum$|^avg$|^count$/)) {
throw "The aggFunction [" + aggFunction + "] is invalid!";
}
if (constInput.length <= 0) {
return [];
}
var stoi = function(input) {
return (isNaN(input)) ? input : parseFloat(input, 10);
}
// -----------------------
// variable initialization
// -----------------------
var start = new Date();
// -----------------------------------------------------
// Copy the input to a new array so we can mess with it.
// -----------------------------------------------------
input = _.map(constInput, function(row) {
// The new object only needs the 2 columns in question, not all of
// them.
obj = {};
// If we are grouping by day or month, we can now reduce date precision
// and make the aggregation easier later.
if (groupFunction === "day") {
obj[xCol] = row[xCol].substring(0, 10);// + "T00:00:00.000Z";
} else if (groupFunction === "month") {
obj[xCol] = row[xCol].substring(0, 7) + "-01";// T00:00:00.000Z";
} else {
obj[xCol] = row[xCol];
}
obj[yCol] = stoi(row[yCol]);
return obj;
});
// -----------------------------------------------------
// Rip nasty null data out of the input.
// -----------------------------------------------------
input = _.filter(input, function(row) {
// false, null, 0, "", undefined and NaN
var x = row[xCol];
var y = row[yCol];
var good = (x !== null && x !== undefined) && (y !== null && y !== undefined && !isNaN(y));
return good;
});
// ---------------------------------------------
// Create the output object already pre-grouped.
// ---------------------------------------------
if (groupFunction === "x") {
_.each(input, function(row, idx) {
hMap[row[xCol]] = null;
});
_.each(hMap, function(value, key) {
obj = {};
obj[xCol] = key;
obj[yCol] = 0;
// Get the output ready with this x value.
output.push(obj);
hMap[key] = output.length - 1;
});
}
if (groupFunction.match(/^day$|^month$/)) {
// normalize input such that all dates exist prior to grouping.
// First build an input structure that has all the dates we need.
// Also "pre-group" the dates.
// Dates are really special; Allow any sort for x value grouping, but
// dates
// really need to be sorted chonologically or it really doesn't look
// right.
input = _.sortBy(input, xCol);
// If input data is already sorted, we can get min and max easily.
var minInGroup = input[0][xCol];
var maxInGroup = input[input.length - 1][xCol];
console.log("minInGroup " + minInGroup);
console.log("maxInGroup " + maxInGroup);
if (groupFunction === "day") {
minInGroup = minInGroup.substring(0, 10);// + "T00:00:00.000Z";
maxInGroup = maxInGroup.substring(0, 10);// + "T00:00:00.000Z";
}
if (groupFunction === "month") {
minInGroup = minInGroup.substring(0, 7) + "-01";// T00:00:00.000Z";
maxInGroup = maxInGroup.substring(0, 7) + "-01";// T00:00:00.000Z";
}
// Simply fill in all dates in the range.
// Also reduce precision for day and month.
var date = new Date(minInGroup);
var maxDate = new Date(maxInGroup);
var iter = 0; // Break infinite loops.
while (date <= maxDate && iter++ < 10000000) {
obj = {};
obj[xCol] = date.toISOString().substring(0, 10);// +
// "T00:00:00.000Z";;
obj[yCol] = 0;
// Get the output ready with this date.
output.push(obj);
// Get the hMap ready so we can quickly map the input into the
// output index.
hMap[obj[xCol]] = output.length - 1;
// Increment date.
// Awesome. Bad date increment code removed thanks to:
// http://stackoverflow.com/a/9989458/1804678
if (groupFunction === "day") {
date.setUTCDate(date.getUTCDate() + 1);
}
if (groupFunction === "month") {
date.setUTCMonth(date.getUTCMonth() + 1);
}
}
}
console.log("hMap " + JSON.stringify(hMap).substring(0, 400) + "...");
// -----------------------------
// perform group and aggregation
// -----------------------------
var max = function(row, idx) {
var groupIdx = hMap[row[xCol]];
if (row[yCol] > output[groupIdx][yCol]) {
output[groupIdx][yCol] = row[yCol];
}
};
var min = function(row, idx) {
var groupIdx = hMap[row[xCol]];
if (row[yCol] < output[groupIdx][yCol]) {
output[groupIdx][yCol] = row[yCol];
}
};
var sum = function(row, idx) {
// Performance testing shows no difference between this and an if/else.
var groupIdx = hMap[row[xCol]];
if (groupIdx === undefined) {
console.log("hMap " + JSON.stringify(hMap).substring(0, 400) + "...");
console.log("row[xCol] " + row[xCol]);
}
output[groupIdx][yCol] += row[yCol];
};
if (aggFunction === "avg") {
sample = new Array(output.length - 1);
for ( var idx = 0; idx < output.length; ++idx) {
sample[idx] = 0;
}
}
var avg = function(row, idx) {
var groupIdx = hMap[row[xCol]];
output[groupIdx][yCol] += row[yCol];
sample[groupIdx] += 1;
};
var count = function(row, idx) {
var groupIdx = hMap[row[xCol]];
output[groupIdx][yCol] += 1;
};
// Use eval to assign the passed in function name to the function, agg.
eval("agg = " + aggFunction);
_.each(input, agg);
// ----------------------------
// avg requires post-processing
// ----------------------------
if (aggFunction === "avg") {
_.each(output, function(row, idx) {
row[yCol] = (sample[idx] === 0) ? 0 : row[yCol] / sample[idx];
});
}
var end = new Date();
console.info("groupBy processed rows " + input.length + " rows in " + (end - start) + "ms.");
return output;
}
describe('groupBy', function() {
beforeEach(module('myApp.controllers'));
var input = [ {
name : "Jess",
date : "2013-12-31T22:22:22.222Z",
age : 1
}, {
name : "Jess",
date : "2013-12-31T11:11:11.111Z",
age : .5
}, {
name : "Amit",
date : "2014-01-03T04:04:04.040Z",
age : 2
} ];
for ( var idx = 0; idx <= 100000; idx++) {
input.push({
name : "Cher",
date : "2014-01-02T00:00:00.000Z",
age : idx
});
}
var monthInput = _.map(input, _.clone);
monthInput[2].date = "2014-03-01T00:00:00.000Z";
it('should throw an exception when groupFunction is bad.', function() {
expect(function() {
groupBy(input, "name", "age", "none", "max");
}).toThrow();
});
it('should throw an exception when aggFunction is bad.', function() {
expect(function() {
groupBy(input, "name", "age", "x", "bad");
}).toThrow();
});
it('should group by name value, showing max.', function() {
expect(groupBy(input, "name", "age", "x", "max")).toEqual([ {
name : "Jess",
age : 1
}, {
name : "Amit",
age : 2
}, {
name : "Cher",
age : 100000
} ]);
});
it('should group by name value, showing sum.', function() {
expect(groupBy(input, "name", "age", "x", "sum")).toEqual([ {
name : "Jess",
age : 1.5
}, {
name : "Amit",
age : 2
}, {
name : "Cher",
age : 5000050000
} ]);
});
it('should group by date/day value, showing sum, filling in days.', function() {
expect(groupBy(input, "date", "age", "day", "sum")).toEqual([ {
date : "2013-12-31",
age : 1.5
}, {
date : "2014-01-01",
age : 0
}, {
date : "2014-01-02",
age : 5000050000
}, {
date : "2014-01-03",
age : 2
} ]);
});
it('should group by date/month value, showing avg, filling in months.', function() {
expect(groupBy(monthInput, "date", "age", "month", "avg")).toEqual([ {
date : "2013-12-01",
age : .75
}, {
date : "2014-01-01",
age : 50000
}, {
date : "2014-02-01",
age : 0
}, {
date : "2014-03-01",
age : 2
} ]);
});
it('should handle blank, null and numbers as strings in input data.', function() {
expect(groupBy([ {
user : "jess",
age : null
}, {
user : null,
age : null
}, {
user : "jess",
age : "2222"
}, {
user : "jess",
age : undefined
}, {
user : "jess",
age : 0
}, {
user : "jess",
age : ''
}, {
user : "jess",
age : "999"
}, {
user : "jess",
age : "99.99"
}, {
user : "jess",
age : "1"
}, {
user : null,
age : NaN
}, {
user : "jess",
age : ' '
}, {
user : "amit",
age : '2222'
}, {
user : "amit",
age : "1"
}, {
user : null,
age : null
}, {
user : "amit",
age : '90'
} ], "user", "age", "x", "max")).toEqual([ {
user : "jess",
age : 2222
}, {
user : "amit",
age : 2222
} ]);
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment