{{ message }}

Instantly share code, notes, and snippets.

# matt-west/pearson-correlation.js

Created Sep 9, 2013
Pearson Correlation (JavaScript)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
 /** * @fileoverview Pearson correlation score algorithm. * @author matt.west@kojilabs.com (Matt West) * @license Copyright 2013 Matt West. * Licensed under MIT (http://opensource.org/licenses/MIT). */ /** * Calculate the person correlation score between two items in a dataset. * * @param {object} prefs The dataset containing data about both items that * are being compared. * @param {string} p1 Item one for comparison. * @param {string} p2 Item two for comparison. * @return {float} The pearson correlation score. */ function pearsonCorrelation(prefs, p1, p2) { var si = []; for (var key in prefs[p1]) { if (prefs[p2][key]) si.push(key); } var n = si.length; if (n == 0) return 0; var sum1 = 0; for (var i = 0; i < si.length; i++) sum1 += prefs[p1][si[i]]; var sum2 = 0; for (var i = 0; i < si.length; i++) sum2 += prefs[p2][si[i]]; var sum1Sq = 0; for (var i = 0; i < si.length; i++) { sum1Sq += Math.pow(prefs[p1][si[i]], 2); } var sum2Sq = 0; for (var i = 0; i < si.length; i++) { sum2Sq += Math.pow(prefs[p2][si[i]], 2); } var pSum = 0; for (var i = 0; i < si.length; i++) { pSum += prefs[p1][si[i]] * prefs[p2][si[i]]; } var num = pSum - (sum1 * sum2 / n); var den = Math.sqrt((sum1Sq - Math.pow(sum1, 2) / n) * (sum2Sq - Math.pow(sum2, 2) / n)); if (den == 0) return 0; return num / den; }

### sash108 commented May 27, 2015

 could you give one example please for it with example dataset?

### antonioaltamura commented Aug 29, 2015

 If you still need it :) ``````var data = new Array( new Array(21,54,60,78,82), new Array(20,54,54,65,45) ); console.log(pearsonCorrelation(data,0,1)) `````` please note the function works with qualitative distributions too (associative array)

### kdamar3 commented May 18, 2017

 Hi there, i used ur library just lazy to write one...i got a pearson relation as NaN for some huge dataset arrays. i tried writing a new code ..then got the actual value. i have one array with values like 0.4554 of more than 20,000 rows and another with values like 20. as it is big file, i cannot link it here but just want to bring to ur attention.

### nahidakbar commented Jul 21, 2017

 function pearson(values) { const n = values.length; if (n == 0) return 0; let meanX = 0; let meanY = 0; for (var i = 0; i < n; i++) { meanX += values[i].x / n meanY += values[i].y / n } let num = 0; let den1 = 0; let den2 = 0; for (var i = 0; i < n; i++) { let dx = (values[i].x - meanX); let dy = (values[i].y - meanY); num += dx * dy den1 += dx * dx den2 += dy * dy } const den = Math.sqrt(den1) * Math.sqrt(den2); if (den == 0) return 0; return num / den; } pearson([{x: 0, y: 0}, {x:1, y:1}])

### LtHelo commented Jun 7, 2018

 thank u ，i just need this

### comfuture commented Oct 26, 2018

 I've rewrote this function in es6. The code is more concise, but it does not work with associative arrays that were possible in the original code. ```/** * calculates pearson correlation * @param {number[]} d1 * @param {number[]} d2 */ export function corr(d1, d2) { let { min, pow, sqrt } = Math let add = (a, b) => a + b let n = min(d1.length, d2.length) if (n === 0) { return 0 } [d1, d2] = [d1.slice(0, n), d2.slice(0, n)] let [sum1, sum2] = [d1, d2].map(l => l.reduce(add)) let [pow1, pow2] = [d1, d2].map(l => l.reduce((a, b) => a + pow(b, 2), 0)) let mulSum = d1.map((n, i) => n * d2[i]).reduce(add) let dense = sqrt((pow1 - pow(sum1, 2) / n) * (pow2 - pow(sum2, 2) / n)) if (dense === 0) { return 0 } return (mulSum - (sum1 * sum2 / n)) / dense }```

### joracha commented Nov 10, 2019

 ``````function pearson(x, y){ let promedio = (lista) => { return lista.reduce((s, a) => s + a, 0) / lista.length }; let n = x.length, prom_x = promedio(x) , prom_y = promedio(y); return (x.map( (e, i, r) => (r = {x:e, y:y[i]}) ).reduce( (s, a) => s + a.x * a.y, 0) - n * prom_x * prom_y) / ((Math.sqrt(x.reduce( (s, a) => (s + a * a) , 0) - n * prom_x * prom_x)) * (Math.sqrt(y.reduce( (s, a) => (s + a * a) , 0) - n * prom_y * prom_y))); } `````` This is my version!. I just did it as a practice for an exam that I have soon

### mbaev commented Apr 24, 2021

 @joracha your version is perfect! It's fastest version at this time! Unfortunately I found that it doesn't respect to "empty" values, but it have to. I've improved it a bit: ```function pearson (x, y) { const promedio = l => l.reduce((s, a) => s + a, 0) / l.length const calc = (v, prom) => Math.sqrt(v.reduce((s, a) => (s + a * a), 0) - n * prom * prom) let n = x.length let nn = 0 for (let i = 0; i < n; i++, nn++) { if ((!x[i] && x[i] !== 0) || (!y[i] && y[i] !== 0)) { nn-- continue } x[nn] = x[i] y[nn] = y[i] } if (n !== nn) { x = x.splice(0, nn) y = y.splice(0, nn) n = nn } const prom_x = promedio(x), prom_y = promedio(y) return (x .map((e, i) => ({ x: e, y: y[i] })) .reduce((v, a) => v + a.x * a.y, 0) - n * prom_x * prom_y ) / (calc(x, prom_x) * calc(y, prom_y)) }``` Test showed that the speed degradation is insignificant (6.92%): ```// test.js const initialPearson = require('../initialPearson') const improvedPearson = require('../improvedPearson') const series1 = [80.026413,80.330908,76.68564,72.095302,75.473899,82.647118] const series2 = [81.662683,85.802179,78.148427,81.126326,77.853491,85.974228] console.log('initialPearson', initialPearson(series1, series2)) console.log('improvedPearson', improvedPearson(series1, series2)) benchmarkjs.options({ testTime: 4000 }) benchmarkjs('initialPearson', () => initialPearson(series1, series2)) benchmarkjs('improvedPearson', () => improvedPearson(series1, series2)) console.log(benchmarkjs.results)``` Output ```\$ node test.js initialPearson 0.6917288370450843 improvedPearson 0.6917288370450843 [ { name: 'initialPearson', elapsed: 4397, checks: 3, totalIterations: 48518134, perSecondIterations: 11034372, isOptimized: null, diff: '0%' }, { name: 'improvedPearson', elapsed: 4395, checks: 3, totalIterations: 45139154, perSecondIterations: 10270569, isOptimized: null, diff: '6.92%' } ]``` In case of having a null it works as expected (and even faster actually): ```// test.js const initialPearson = require('../initialPearson') const improvedPearson = require('../improvedPearson') const series1 = [80.026413,80.330908,76.68564,72.095302,null,82.647118] const series2 = [81.662683,85.802179,78.148427,81.126326,77.853491,85.974228] benchmarkjs.options({ testTime: 4000 }) benchmarkjs('initialPearson', () => initialPearson(series1, series2)) benchmarkjs('improvedPearson', () => improvedPearson(series1, series2)) console.log(benchmarkjs.results)``` ```\$ node test.js initialPearson 0.5995223578720159 improvedPearson 0.6570987279478532 [ { name: 'initialPearson', elapsed: 4136, checks: 2, totalIterations: 26454098, perSecondIterations: 6396058, isOptimized: null, diff: '0%' }, { name: 'improvedPearson', elapsed: 4139, checks: 2, totalIterations: 26027407, perSecondIterations: 6288332, isOptimized: null, diff: '1.68%' } ]``` Thank you!

### netik commented Jul 7, 2021

 The above code has a bug, n is used before being defined. ` let n = x.length` needs to be before the `const calc ...` line.

### mbaev commented Jul 11, 2021

 The above code has a bug, n is used before being defined. ` let n = x.length` needs to be before the `const calc ...` line. No it hasn't. The third line defines function `calc` without calling it.

### netik commented Jul 11, 2021

 The above code has a bug, n is used before being defined. ` let n = x.length` needs to be before the `const calc ...` line. No it hasn't. The third line defines function `calc` without calling it. Well, it's still bad form. You're referencing a global that's defined later in scope and generally that's bad form. The current version of ESLint doesn't like this, but as you're only giving an example here I guess it might be fine.