Created Feb 22, 2012
Min, Max, Sum, Count, Avg, and Std deviation using MongoDB MapReduce
 // derived from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm function map() { emit(1, // Or put a GROUP BY key here {sum: this.value, // the field you want stats for min: this.value, max: this.value, count:1, diff: 0, // M2,n: sum((val-mean)^2) }); } function reduce(key, values) { var a = values[0]; // will reduce into here for (var i=1/*!*/; i < values.length; i++){ var b = values[i]; // will merge 'b' into 'a' // temp helpers var delta = a.sum/a.count - b.sum/b.count; // a.mean - b.mean var weight = (a.count * b.count)/(a.count + b.count); // do the reducing a.diff += b.diff + delta*delta*weight; a.sum += b.sum; a.count += b.count; a.min = Math.min(a.min, b.min); a.max = Math.max(a.max, b.max); } return a; } function finalize(key, value){ value.avg = value.sum / value.count; value.variance = value.diff / value.count; value.stddev = Math.sqrt(value.variance); return value; }
 > load('functions.js') > db.stuff.drop() false > db.stuff.insert({value:1}) > db.stuff.insert({value:2}) > db.stuff.insert({value:2}) > db.stuff.insert({value:2}) > db.stuff.insert({value:3}) > db.stuff.mapReduce(map, reduce, {finalize:finalize, out:{inline:1}}).results[0] { "_id" : 1, "value" : { "sum" : 10, "min" : 1, "max" : 3, "count" : 5, "diff" : 2, "avg" : 2, "variance" : 0.4, "stddev" : 0.6324555320336759 } }

### peshkira commented Jun 1, 2012

 You sir, rock! Thanks so much :)

### benbuckman commented Jun 24, 2012

 Thank you for posting this!

### ghost commented Nov 1, 2012

 Thank you, It's very useful for me.

### zuxqoj commented May 2, 2013

 Can we merge two groups if there are duplicate entries in them example I have two groups group1 and group2 having userName and usage group1 user1 10 user2 12 user3 15 group2 user2 14 user3 13 user4 16 merged user1 10 user2 26 user3 28 user4 16 stdev - 7.3485 i want to find st dev of merged group, but problem is because of high data volume I can't maintain userName , so i am maintaining sum and user count, and by probablistic counting algorithm I can find unique users also group1 count-3, sum-37, diff-9.935 group2 count-3, sum-43, diff-7.919 merged group unique- 4 group1:: count-3, sum-37, diff-9.935 group2:: count-3, sum-43, diff-7.919 now with this information available, can i find standard deviation of merged group?? Thanks!!

### jalava commented Jun 7, 2013

 What is the license on this code?

### RedBeard0531 commented Sep 11, 2013

 @jalava What is the license on this code? Public Domain

### Pyrolistical commented Dec 26, 2013

 Word of warning, this is doing population standard dev and variance, NOT SAMPLE. I cleaned up the code and included sample variance and standard dev. https://gist.github.com/Pyrolistical/8139958

### db-roberto commented Mar 13, 2014

 Very useful code

### patrickhempel commented Jul 17, 2014

 Thank you!