| // derived from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm | |
| function map() { | |
| emit(1, // Or put a GROUP BY key here | |
| {sum: this.value, // the field you want stats for | |
| min: this.value, | |
| max: this.value, | |
| count:1, | |
| diff: 0, // M2,n: sum((val-mean)^2) | |
| }); | |
| } | |
| function reduce(key, values) { | |
| var a = values[0]; // will reduce into here | |
| for (var i=1/*!*/; i < values.length; i++){ | |
| var b = values[i]; // will merge 'b' into 'a' | |
| // temp helpers | |
| var delta = a.sum/a.count - b.sum/b.count; // a.mean - b.mean | |
| var weight = (a.count * b.count)/(a.count + b.count); | |
| // do the reducing | |
| a.diff += b.diff + delta*delta*weight; | |
| a.sum += b.sum; | |
| a.count += b.count; | |
| a.min = Math.min(a.min, b.min); | |
| a.max = Math.max(a.max, b.max); | |
| } | |
| return a; | |
| } | |
| function finalize(key, value){ | |
| value.avg = value.sum / value.count; | |
| value.variance = value.diff / value.count; | |
| value.stddev = Math.sqrt(value.variance); | |
| return value; | |
| } |
| > load('functions.js') | |
| > db.stuff.drop() | |
| false | |
| > db.stuff.insert({value:1}) | |
| > db.stuff.insert({value:2}) | |
| > db.stuff.insert({value:2}) | |
| > db.stuff.insert({value:2}) | |
| > db.stuff.insert({value:3}) | |
| > db.stuff.mapReduce(map, reduce, {finalize:finalize, out:{inline:1}}).results[0] | |
| { | |
| "_id" : 1, | |
| "value" : { | |
| "sum" : 10, | |
| "min" : 1, | |
| "max" : 3, | |
| "count" : 5, | |
| "diff" : 2, | |
| "avg" : 2, | |
| "variance" : 0.4, | |
| "stddev" : 0.6324555320336759 | |
| } | |
| } |
This comment has been minimized.
This comment has been minimized.
|
Thank you for posting this! |
This comment has been minimized.
This comment has been minimized.
|
Thank you, It's very useful for me. |
This comment has been minimized.
This comment has been minimized.
|
Can we merge two groups if there are duplicate entries in them example I have two groups group1 and group2 having userName and usage group1 group2 merged stdev - 7.3485 i want to find st dev of merged group, but problem is because of high data volume I can't maintain userName , so i am maintaining sum and user count, and by probablistic counting algorithm I can find unique users also group1 group2 merged group now with this information available, can i find standard deviation of merged group?? |
This comment has been minimized.
This comment has been minimized.
|
What is the license on this code? |
This comment has been minimized.
This comment has been minimized.
Public Domain |
This comment has been minimized.
This comment has been minimized.
|
Word of warning, this is doing population standard dev and variance, NOT SAMPLE. I cleaned up the code and included sample variance and standard dev. |
This comment has been minimized.
This comment has been minimized.
|
Very useful code |
This comment has been minimized.
This comment has been minimized.
|
Thank you! |
This comment has been minimized.
You sir, rock!
Thanks so much :)