Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Pearson Correlation (JavaScript)
/**
* @fileoverview Pearson correlation score algorithm.
* @author matt.west@kojilabs.com (Matt West)
* @license Copyright 2013 Matt West.
* Licensed under MIT (http://opensource.org/licenses/MIT).
*/
/**
* Calculate the person correlation score between two items in a dataset.
*
* @param {object} prefs The dataset containing data about both items that
* are being compared.
* @param {string} p1 Item one for comparison.
* @param {string} p2 Item two for comparison.
* @return {float} The pearson correlation score.
*/
function pearsonCorrelation(prefs, p1, p2) {
var si = [];
for (var key in prefs[p1]) {
if (prefs[p2][key]) si.push(key);
}
var n = si.length;
if (n == 0) return 0;
var sum1 = 0;
for (var i = 0; i < si.length; i++) sum1 += prefs[p1][si[i]];
var sum2 = 0;
for (var i = 0; i < si.length; i++) sum2 += prefs[p2][si[i]];
var sum1Sq = 0;
for (var i = 0; i < si.length; i++) {
sum1Sq += Math.pow(prefs[p1][si[i]], 2);
}
var sum2Sq = 0;
for (var i = 0; i < si.length; i++) {
sum2Sq += Math.pow(prefs[p2][si[i]], 2);
}
var pSum = 0;
for (var i = 0; i < si.length; i++) {
pSum += prefs[p1][si[i]] * prefs[p2][si[i]];
}
var num = pSum - (sum1 * sum2 / n);
var den = Math.sqrt((sum1Sq - Math.pow(sum1, 2) / n) *
(sum2Sq - Math.pow(sum2, 2) / n));
if (den == 0) return 0;
return num / den;
}
@sash108

This comment has been minimized.

Copy link

@sash108 sash108 commented May 27, 2015

could you give one example please for it with example dataset?

@antonioaltamura

This comment has been minimized.

Copy link

@antonioaltamura antonioaltamura commented Aug 29, 2015

If you still need it :)

var data = new Array(
                    new Array(21,54,60,78,82),
                    new Array(20,54,54,65,45)
        );
console.log(pearsonCorrelation(data,0,1))

please note the function works with qualitative distributions too (associative array)

@kdamar3

This comment has been minimized.

Copy link

@kdamar3 kdamar3 commented May 18, 2017

Hi there, i used ur library just lazy to write one...i got a pearson relation as NaN for some huge dataset arrays. i tried writing a new code ..then got the actual value.

i have one array with values like 0.4554 of more than 20,000 rows and another with values like 20. as it is big file, i cannot link it here but just want to bring to ur attention.

@nahidakbar

This comment has been minimized.

Copy link

@nahidakbar nahidakbar commented Jul 21, 2017

function pearson(values) {
const n = values.length;

if (n == 0) return 0;

let meanX = 0;
let meanY = 0;
for (var i = 0; i < n; i++) {
meanX += values[i].x / n
meanY += values[i].y / n
}

let num = 0;
let den1 = 0;
let den2 = 0;

for (var i = 0; i < n; i++) {
let dx = (values[i].x - meanX);
let dy = (values[i].y - meanY);
num += dx * dy
den1 += dx * dx
den2 += dy * dy
}

const den = Math.sqrt(den1) * Math.sqrt(den2);

if (den == 0) return 0;

return num / den;
}

pearson([{x: 0, y: 0}, {x:1, y:1}])

@LtHelo

This comment has been minimized.

Copy link

@LtHelo LtHelo commented Jun 7, 2018

thank u ,i just need this

@comfuture

This comment has been minimized.

Copy link

@comfuture comfuture commented Oct 26, 2018

I've rewrote this function in es6.
The code is more concise, but it does not work with associative arrays that were possible in the original code.

/**
 * calculates pearson correlation
 * @param {number[]} d1
 * @param {number[]} d2
 */
export function corr(d1, d2) {
  let { min, pow, sqrt } = Math
  let add = (a, b) => a + b
  let n = min(d1.length, d2.length)
  if (n === 0) {
    return 0
  }
  [d1, d2] = [d1.slice(0, n), d2.slice(0, n)]
  let [sum1, sum2] = [d1, d2].map(l => l.reduce(add))
  let [pow1, pow2] = [d1, d2].map(l => l.reduce((a, b) => a + pow(b, 2), 0))
  let mulSum = d1.map((n, i) => n * d2[i]).reduce(add)
  let dense = sqrt((pow1 - pow(sum1, 2) / n) * (pow2 - pow(sum2, 2) / n))
  if (dense === 0) {
    return 0
  }
  return (mulSum - (sum1 * sum2 / n)) / dense
}
@joracha

This comment has been minimized.

Copy link

@joracha joracha commented Nov 10, 2019

function pearson(x, y){
	let promedio = (lista) => { return lista.reduce((s, a) => s + a, 0) / lista.length };
	let n = x.length, prom_x = promedio(x) , prom_y = promedio(y);
	return (x.map( (e, i, r) => (r = {x:e, y:y[i]}) ).reduce( (s, a) => s + a.x * a.y, 0) - n * prom_x * prom_y) / 
	((Math.sqrt(x.reduce( (s, a) => (s + a * a) , 0) - n * prom_x * prom_x)) *
	(Math.sqrt(y.reduce( (s, a) => (s + a * a) , 0) - n * prom_y * prom_y)));
} 

This is my version!. I just did it as a practice for an exam that I have soon

@mbaev

This comment has been minimized.

Copy link

@mbaev mbaev commented Apr 24, 2021

@joracha your version is perfect! It's fastest version at this time!

Unfortunately I found that it doesn't respect to "empty" values, but it have to.

image

I've improved it a bit:

function pearson (x, y) {
  const promedio = l => l.reduce((s, a) => s + a, 0) / l.length
  const calc = (v, prom) => Math.sqrt(v.reduce((s, a) => (s + a * a), 0) - n * prom * prom)
  let n = x.length
  let nn = 0
  for (let i = 0; i < n; i++, nn++) {
    if ((!x[i] && x[i] !== 0) || (!y[i] && y[i] !== 0)) {
      nn--
      continue
    }
    x[nn] = x[i]
    y[nn] = y[i]
  }
  if (n !== nn) {
    x = x.splice(0, nn)
    y = y.splice(0, nn)
    n = nn
  }
  const prom_x = promedio(x), prom_y = promedio(y)
  return (x
      .map((e, i) => ({ x: e, y: y[i] }))
      .reduce((v, a) => v + a.x * a.y, 0) - n * prom_x * prom_y
  ) / (calc(x, prom_x) * calc(y, prom_y))
}

Test showed that the speed degradation is insignificant (6.92%):

// test.js
const initialPearson = require('../initialPearson')
const improvedPearson = require('../improvedPearson')

const series1 = [80.026413,80.330908,76.68564,72.095302,75.473899,82.647118]
const series2 = [81.662683,85.802179,78.148427,81.126326,77.853491,85.974228]

console.log('initialPearson', initialPearson(series1, series2))
console.log('improvedPearson', improvedPearson(series1, series2))

benchmarkjs.options({ testTime: 4000 })
benchmarkjs('initialPearson', () => initialPearson(series1, series2))
benchmarkjs('improvedPearson', () => improvedPearson(series1, series2))
console.log(benchmarkjs.results)

Output

$ node test.js
initialPearson 0.6917288370450843
improvedPearson 0.6917288370450843
[
  {
    name: 'initialPearson',
    elapsed: 4397,
    checks: 3,
    totalIterations: 48518134,
    perSecondIterations: 11034372,
    isOptimized: null,
    diff: '0%'
  },
  {
    name: 'improvedPearson',
    elapsed: 4395,
    checks: 3,
    totalIterations: 45139154,
    perSecondIterations: 10270569,
    isOptimized: null,
    diff: '6.92%'
  }
]

In case of having a null it works as expected (and even faster actually):

// test.js
const initialPearson = require('../initialPearson')
const improvedPearson = require('../improvedPearson')

const series1 = [80.026413,80.330908,76.68564,72.095302,null,82.647118]
const series2 = [81.662683,85.802179,78.148427,81.126326,77.853491,85.974228]

benchmarkjs.options({ testTime: 4000 })
benchmarkjs('initialPearson', () => initialPearson(series1, series2))
benchmarkjs('improvedPearson', () => improvedPearson(series1, series2))
console.log(benchmarkjs.results)
$ node test.js
initialPearson 0.5995223578720159
improvedPearson 0.6570987279478532
[
  {
    name: 'initialPearson',
    elapsed: 4136,
    checks: 2,
    totalIterations: 26454098,
    perSecondIterations: 6396058,
    isOptimized: null,
    diff: '0%'
  },
  {
    name: 'improvedPearson',
    elapsed: 4139,
    checks: 2,
    totalIterations: 26027407,
    perSecondIterations: 6288332,
    isOptimized: null,
    diff: '1.68%'
  }
]

Thank you!

@netik

This comment has been minimized.

Copy link

@netik netik commented Jul 7, 2021

The above code has a bug, n is used before being defined.

let n = x.length

needs to be before the const calc ... line.

@mbaev

This comment has been minimized.

Copy link

@mbaev mbaev commented Jul 11, 2021

The above code has a bug, n is used before being defined.

let n = x.length

needs to be before the const calc ... line.

No it hasn't. The third line defines function calc without calling it.

@netik

This comment has been minimized.

Copy link

@netik netik commented Jul 11, 2021

The above code has a bug, n is used before being defined.
let n = x.length
needs to be before the const calc ... line.

No it hasn't. The third line defines function calc without calling it.

Well, it's still bad form. You're referencing a global that's defined later in scope and generally that's bad form.
The current version of ESLint doesn't like this, but as you're only giving an example here I guess it might be fine.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment