bradyrx/pearsonr.py

## pearsonr.py
import numpy as np
import scipy.stats as ss
from scipy.stats.stats import pearsonr as pr

def pearsonr(x, y, two_sided=True):
    """
    Computes the Pearson product-moment coefficient of linear correlation. This
    version calculates the effective degrees of freedom, accounting for autocorrelation
    within each time series that could fluff the significance of the correlation.

    Parameters
    ----------
    x : array; independent variable
    y : array; predicted variable
    two_sided : boolean (optional); Whether or not the t-test should be two sided.
    Returns
    -------
    r     : r-value of correlation
    p     : p-value for significance
    n_eff : effective degrees of freedom
    References:
    ----------
    1. Wilks, Daniel S. Statistical methods in the atmospheric sciences.
    Vol. 100. Academic press, 2011.
    2. Lovenduski, Nicole S., and Nicolas Gruber. "Impact of the Southern Annular Mode
    on Southern Ocean circulation and biology." Geophysical Research Letters 32.11 (2005).
    Examples
    --------
    import numpy as np
    import esmtools as et
    x = np.random.randn(100,)
    y = np.random.randn(100,)
    r, p, n = et.stats.pearsonr(x, y)
    """
    r, _ = pr(x, y)
    # Compute effective sample size.
    n = len(x)
    xa, ya = x - np.mean(x), y - np.mean(y)
    xauto, _ = pr(xa[1:], xa[:-1])
    yauto, _ = pr(ya[1:], ya[:-1])
    n_eff = n * (1 - xauto*yauto)/(1 + xauto*yauto)
    n_eff = np.floor(n_eff)
    # Compute t-statistic.
    t = r * np.sqrt((n_eff - 2)/(1 - r**2))
    # Compute p-value.
    if two_sided:
        p = ss.t.sf(np.abs(t), n_eff-1)*2
    else:
        p = ss.t.sf(np.abs(t), n_eff-1)
        return r, p, n_eff
	import numpy as np
	import scipy.stats as ss
	from scipy.stats.stats import pearsonr as pr

	def pearsonr(x, y, two_sided=True):
	"""
	Computes the Pearson product-moment coefficient of linear correlation. This
	version calculates the effective degrees of freedom, accounting for autocorrelation
	within each time series that could fluff the significance of the correlation.

	Parameters
	----------
	x : array; independent variable
	y : array; predicted variable
	two_sided : boolean (optional); Whether or not the t-test should be two sided.
	Returns
	-------
	r : r-value of correlation
	p : p-value for significance
	n_eff : effective degrees of freedom
	References:
	----------
	1. Wilks, Daniel S. Statistical methods in the atmospheric sciences.
	Vol. 100. Academic press, 2011.
	2. Lovenduski, Nicole S., and Nicolas Gruber. "Impact of the Southern Annular Mode
	on Southern Ocean circulation and biology." Geophysical Research Letters 32.11 (2005).
	Examples
	--------
	import numpy as np
	import esmtools as et
	x = np.random.randn(100,)
	y = np.random.randn(100,)
	r, p, n = et.stats.pearsonr(x, y)
	"""
	r, _ = pr(x, y)
	# Compute effective sample size.
	n = len(x)
	xa, ya = x - np.mean(x), y - np.mean(y)
	xauto, _ = pr(xa[1:], xa[:-1])
	yauto, _ = pr(ya[1:], ya[:-1])
	n_eff = n * (1 - xautoyauto)/(1 + xautoyauto)
	n_eff = np.floor(n_eff)
	# Compute t-statistic.
	t = r * np.sqrt((n_eff - 2)/(1 - r**2))
	# Compute p-value.
	if two_sided:
	p = ss.t.sf(np.abs(t), n_eff-1)*2
	else:
	p = ss.t.sf(np.abs(t), n_eff-1)
	return r, p, n_eff