Skip to content

Instantly share code, notes, and snippets.

@ankit-maverick
Created November 4, 2013 07:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ankit-maverick/7299153 to your computer and use it in GitHub Desktop.
Save ankit-maverick/7299153 to your computer and use it in GitHub Desktop.
In [10]: a = np.arange(20).reshape(10,2).astype(np.float)
In [12]: a[3,1] = np.nan
In [14]: a[5,0] = np.nan
In [15]: a
Out[15]:
array([[ 0., 1.],
[ 2., 3.],
[ 4., 5.],
[ 6., nan],
[ 8., 9.],
[ nan, 11.],
[ 12., 13.],
[ 14., 15.],
[ 16., 17.],
[ 18., 19.]])
In [16]: from sklearn.utils import array2d
In [17]: X = array2d(a, force_all_finite=False)
In [18]: X
Out[18]:
array([[ 0., 1.],
[ 2., 3.],
[ 4., 5.],
[ 6., nan],
[ 8., 9.],
[ nan, 11.],
[ 12., 13.],
[ 14., 15.],
[ 16., 17.],
[ 18., 19.]])
In [20]: def _get_mask(X, value_to_mask):
....: """Compute the boolean mask X == missing_values."""
....: if value_to_mask == "NaN" or np.isnan(value_to_mask):
....: return np.isnan(X)
....: else:
....: return X == value_to_mask
....:
In [21]: mask = _get_mask(X, "NaN")
In [24]: masked_X = np.ma.masked_array(X, mask=mask)
In [27]: median_masked = np.ma.median(masked_X, axis=0)
In [28]: median_masked
Out[28]:
masked_array(data = [ 8. 11.],
mask = False,
fill_value = 1e+20)
In [29]: mean_masked = np.ma.mean(masked_X, axis=0)
In [30]: mean_masked
Out[30]:
masked_array(data = [8.88888888888889 10.333333333333334],
mask = [False False],
fill_value = 1e+20)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment