Skip to content

Instantly share code, notes, and snippets.

@lukegre
Last active December 10, 2020 20:06
Show Gist options
  • Save lukegre/4bb5c483b2d111e52413b260311fbe43 to your computer and use it in GitHub Desktop.
Save lukegre/4bb5c483b2d111e52413b260311fbe43 to your computer and use it in GitHub Desktop.
function that takes the trend for an xarray.DataArray over the 'time' variable. Returns the slopes and p-values for each location.
def dataset_encoding(xds):
cols = ['source', 'original_shape', 'dtype', 'zlib', 'complevel', 'chunksizes']
info = pd.DataFrame(columns=cols, index=xds.data_vars)
for row in info.index:
var_encoding = xds[row].encoding
for col in info.keys():
info.ix[row, col] = var_encoding.pop(col, '')
return info
def xarray_trend(xarr):
from scipy import stats
# getting shapes
m = np.prod(xarr.shape[1:]).squeeze()
n = xarr.shape[0]
# creating x and y variables for linear regression
x = xarr.time.to_pandas().index.to_julian_date().values[:, None]
y = xarr.to_masked_array().reshape(n, -1)
# ############################ #
# LINEAR REGRESSION DONE BELOW #
xm = x.mean(0) # mean
ym = y.mean(0) # mean
ya = y - ym # anomaly
xa = x - xm # anomaly
# variance and covariances
xss = (xa ** 2).sum(0) / (n - 1) # variance of x (with df as n-1)
yss = (ya ** 2).sum(0) / (n - 1) # variance of y (with df as n-1)
xys = (xa * ya).sum(0) / (n - 1) # covariance (with df as n-1)
# slope and intercept
slope = xys / xss
intercept = ym - (slope * xm)
# statistics about fit
df = n - 2
r = xys / (xss * yss)**0.5
t = r * (df / ((1 - r) * (1 + r)))**0.5
p = stats.distributions.t.sf(abs(t), df)
# misclaneous additional functions
# yhat = dot(x, slope[None]) + intercept
# sse = ((yhat - y)**2).sum(0) / (n - 2) # n-2 is df
# se = ((1 - r**2) * yss / xss / df)**0.5
# preparing outputs
out = xarr[:2].mean('time')
# first create variable for slope and adjust meta
xarr_slope = out.copy()
xarr_slope.name += '_slope'
xarr_slope.attrs['units'] = 'units / day'
xarr_slope.values = slope.reshape(xarr.shape[1:])
# do the same for the p value
xarr_p = out.copy()
xarr_p.name += '_Pvalue'
xarr_p.attrs['info'] = "If p < 0.05 then the results from 'slope' are significant."
xarr_p.values = p.reshape(xarr.shape[1:])
# join these variables
xarr_out = xarr_slope.to_dataset(name='slope')
xarr_out['pval'] = xarr_p
return xarr_out
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment