kevhill/nan_compare.py

## nan_compare.py
import numpy as np
import copy


def nan_cov(X, preserve_input=True):

    # we need to mutate input to fill nan's with 0's
    # if we need to use the input later, we need to use a copy now
    if preserve_input:
        X = copy.deepcopy(X)

    # note the nan values, but then fill them with 0s
    nanX = np.isnan(X)
    X[nanX] = 0

    # masking matrix indicating where we have good data
    iX = (~nanX).astype(int)

    # sum of each row when filtered through valid measurements in each pair
    # note that if one of the values going into the sum was nan, it is now 0 and
    # will not contribute to the sum
    sX = np.dot(X, iX.T)

    # count of total matches
    cX = np.dot(iX, iX.T)

    # mean is just sum / count
    uX = sX / cX

    return (np.dot(X, X.T) / cX) - uX * uX.T


def nan_cor(X, preserve_input=True):

    # we need to mutate input to fill nan's with 0's
    # if we need to use the input later, we need to use a copy now
    if preserve_input:
        X = copy.deepcopy(X)

    # note the nan values, but then fill them with 0s
    nanX = np.isnan(X)
    X[nanX] = 0

    # masking matrix indicating where we have good data
    iX = (~nanX).astype(int)

    # sum of each row when filtered through valid measurements in each pair
    # note that if one of the values going into the sum was nan, it is now 0 and
    # will not contribute to the sum
    sX = np.dot(X, iX.T)

    # count of total matches
    cX = np.dot(iX, iX.T)

    # mean is just sum / count
    uX = sX / cX

    # compute covariance matrix
    cov = ((np.dot(X, X.T) / cX) - uX * uX.T)

    # Variance for each row when filtered through valid measures in the pair
    vX = (np.dot(X ** 2, iX.T) / cX - uX ** 2)

    # normalize xcov by the sqrt of product of the variances
    return cov / (vX * vX.T) ** .5


def nan_xcov(A, B, preserve_input=True):

    # we need to mutate A and B to fill nan's with 0's
    # if we need to use the input later, we need to use a copy now
    if preserve_input:
        A = copy.deepcopy(A)
        B = copy.deepcopy(B)

    # note the nan values, but then fill them with 0s
    nanA = np.isnan(A)
    A[nanA] = 0

    nanB = np.isnan(B)
    B[nanB] = 0

    # masking matrices indicating where we have good data
    iA = (~nanA).astype(int)
    iB = (~nanB).astype(int)

    # this is the number of valid matches from A -> B (c.T == matches B -> A)
    c = np.dot(iA, iB.T)

    # compute the cross covariance between the two matrices, filtered for good values
    return (np.dot(A, B.T) - (np.dot(A, iB.T) * np.dot(iA, B.T) / c)) / c


def nan_xcor(A, B, preserve_input=True):

    # we need to mutate A and B to fill nan's with 0's
    # if we need to use the input later, we need to use a copy now
    if preserve_input:
        A = copy.deepcopy(A)
        B = copy.deepcopy(B)

    # note the nan values, but then fill them with 0s
    nanA = np.isnan(A)
    A[nanA] = 0

    nanB = np.isnan(B)
    B[nanB] = 0

    # masking matrices indicating where we have good data
    iA = (~nanA).astype(int)
    iB = (~nanB).astype(int)

    # this is the number of valid matches from A -> B (c.T == matches B -> A)
    c = np.dot(iA, iB.T)

    # compute the cross covariance between the two matrices, filtered for good values
    xcov = (np.dot(A, B.T) - (np.dot(A, iB.T) * np.dot(iA, B.T) / c)) / c

    # Variance for each matrix when filtered through valid measures in the other
    # in the case of vB we are really finding vB.t to put it in the same
    # orientation as vA
    vA = np.dot(A ** 2, iB.T) / c - (np.dot(A, iB.T) / c) ** 2
    vB = np.dot(iA, B.T ** 2) / c - (np.dot(iA, B.T) / c) ** 2

    # normalize xcov by the sqrt of product of the variances
    return xcov / (vA * vB) ** .5
	import numpy as np
	import copy


	def nan_cov(X, preserve_input=True):

	# we need to mutate input to fill nan's with 0's
	# if we need to use the input later, we need to use a copy now
	if preserve_input:
	X = copy.deepcopy(X)

	# note the nan values, but then fill them with 0s
	nanX = np.isnan(X)
	X[nanX] = 0

	# masking matrix indicating where we have good data
	iX = (~nanX).astype(int)

	# sum of each row when filtered through valid measurements in each pair
	# note that if one of the values going into the sum was nan, it is now 0 and
	# will not contribute to the sum
	sX = np.dot(X, iX.T)

	# count of total matches
	cX = np.dot(iX, iX.T)

	# mean is just sum / count
	uX = sX / cX

	return (np.dot(X, X.T) / cX) - uX * uX.T


	def nan_cor(X, preserve_input=True):

	# we need to mutate input to fill nan's with 0's
	# if we need to use the input later, we need to use a copy now
	if preserve_input:
	X = copy.deepcopy(X)

	# note the nan values, but then fill them with 0s
	nanX = np.isnan(X)
	X[nanX] = 0

	# masking matrix indicating where we have good data
	iX = (~nanX).astype(int)

	# sum of each row when filtered through valid measurements in each pair
	# note that if one of the values going into the sum was nan, it is now 0 and
	# will not contribute to the sum
	sX = np.dot(X, iX.T)

	# count of total matches
	cX = np.dot(iX, iX.T)

	# mean is just sum / count
	uX = sX / cX

	# compute covariance matrix
	cov = ((np.dot(X, X.T) / cX) - uX * uX.T)

	# Variance for each row when filtered through valid measures in the pair
	vX = (np.dot(X 2, iX.T) / cX - uX 2)

	# normalize xcov by the sqrt of product of the variances
	return cov / (vX * vX.T) ** .5


	def nan_xcov(A, B, preserve_input=True):

	# we need to mutate A and B to fill nan's with 0's
	# if we need to use the input later, we need to use a copy now
	if preserve_input:
	A = copy.deepcopy(A)
	B = copy.deepcopy(B)

	# note the nan values, but then fill them with 0s
	nanA = np.isnan(A)
	A[nanA] = 0

	nanB = np.isnan(B)
	B[nanB] = 0

	# masking matrices indicating where we have good data
	iA = (~nanA).astype(int)
	iB = (~nanB).astype(int)

	# this is the number of valid matches from A -> B (c.T == matches B -> A)
	c = np.dot(iA, iB.T)

	# compute the cross covariance between the two matrices, filtered for good values
	return (np.dot(A, B.T) - (np.dot(A, iB.T) * np.dot(iA, B.T) / c)) / c


	def nan_xcor(A, B, preserve_input=True):

	# we need to mutate A and B to fill nan's with 0's
	# if we need to use the input later, we need to use a copy now
	if preserve_input:
	A = copy.deepcopy(A)
	B = copy.deepcopy(B)

	# note the nan values, but then fill them with 0s
	nanA = np.isnan(A)
	A[nanA] = 0

	nanB = np.isnan(B)
	B[nanB] = 0

	# masking matrices indicating where we have good data
	iA = (~nanA).astype(int)
	iB = (~nanB).astype(int)

	# this is the number of valid matches from A -> B (c.T == matches B -> A)
	c = np.dot(iA, iB.T)

	# compute the cross covariance between the two matrices, filtered for good values
	xcov = (np.dot(A, B.T) - (np.dot(A, iB.T) * np.dot(iA, B.T) / c)) / c

	# Variance for each matrix when filtered through valid measures in the other
	# in the case of vB we are really finding vB.t to put it in the same
	# orientation as vA
	vA = np.dot(A 2, iB.T) / c - (np.dot(A, iB.T) / c) 2
	vB = np.dot(iA, B.T 2) / c - (np.dot(iA, B.T) / c) 2

	# normalize xcov by the sqrt of product of the variances
	return xcov / (vA * vB) ** .5