Skip to content

Instantly share code, notes, and snippets.

@j08lue
Last active April 28, 2016 10:57
Show Gist options
  • Save j08lue/8c4034432d12902bdd11a85d63af3b26 to your computer and use it in GitHub Desktop.
Save j08lue/8c4034432d12902bdd11a85d63af3b26 to your computer and use it in GitHub Desktop.
assigning data with missing values to int-type netCDF variables with scipy and netCDF4
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from __future__ import print_function\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'0.17.0'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import scipy\n",
"from scipy.io import netcdf\n",
"scipy.__version__"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Make some sample data in a masked array"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"masked_array(data = [1.0 2.0 -- -- --],\n",
" mask = [False False True True True],\n",
" fill_value = 1e+20)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = np.ma.array([1,2,3,4,5], dtype='f', mask=[False, False, True, True, True])\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def printr(var):\n",
" print(var.__repr__())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Initialize netCDF file"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"fname = 'scipy_nan_test.nc'\n",
"#ds = netcdf.netcdf_file(fname, 'w', maskandscale=True)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# To compare with netCDF4 behaviour, uncomment this and fix the one cell that fails.\n",
"import netCDF4\n",
"ds = netCDF4.Dataset(fname, 'w')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"ds.createDimension('time', len(data))\n",
"time = ds.createVariable('time', 'i', ('time',))\n",
"time[:] = np.arange(len(data))\n",
"time.units = 'days since 2008-01-01'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create `float` and `int` variables and assign data in different ways"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 1. Float masked array to float variable -- OK"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"masked_array(data = [1.0 2.0 -- -- --],\n",
" mask = [False False True True True],\n",
" fill_value = 9.96921e+36)\n",
"\n"
]
}
],
"source": [
"floatvar_masked = ds.createVariable('floatvar_masked', 'f', ('time',))\n",
"floatvar_masked[:] = data\n",
"printr(floatvar_masked[:])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 2. Float data with NaN to float variable -- OK"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"array([ 1., 2., nan, nan, nan], dtype=float32)\n"
]
}
],
"source": [
"floatvar_nan = ds.createVariable('floatvar_nan', 'f', ('time',))\n",
"floatvar_nan[:] = data.filled(np.nan)\n",
"printr(floatvar_nan[:])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 3. Float `masked_array` to int variable -- FAILS SILENTLY"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"array([ 1, 2, -2147483648, -2147483648, -2147483648], dtype=int32)\n"
]
}
],
"source": [
"intvar_masked = ds.createVariable('intvar_masked', 'i', ('time',))\n",
"intvar_masked[:] = data\n",
"printr(intvar_masked[:])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This failure is probably due to the same problem as `intvar_fill_bad`: upon assignment, the float-type `data` is filled with the `masked_array`'s `fill_value`, which is 1e20 by default (see above)."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 4. Integer masked array to integer variable -- OK"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"masked_array(data = [1 2 -- -- --],\n",
" mask = [False False True True True],\n",
" fill_value = -2147483647)\n",
"\n"
]
}
],
"source": [
"intvar_masked_int = ds.createVariable('intvar_masked_int', 'i', ('time',))\n",
"intvar_masked_int[:] = data.astype('i')\n",
"printr(intvar_masked_int[:])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 5. Float data with NaN to integer variable -- FAILS SILENTLY"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"array([ 1, 2, -2147483648, -2147483648, -2147483648], dtype=int32)\n"
]
}
],
"source": [
"intvar_nan = ds.createVariable('intvar_nan', 'i', ('time',))\n",
"intvar_nan[:] = data.filled(np.nan)\n",
"printr(intvar_nan[:])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 6. Float data filled with compatible `_FillValue` to int variable -- OK"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"masked_array(data = [1 2 -- -- --],\n",
" mask = [False False True True True],\n",
" fill_value = 888888)\n",
"\n"
]
}
],
"source": [
"intvar_fill = ds.createVariable('intvar_fill', 'i', ('time',), fill_value=888888)\n",
"#intvar_fill._FillValue = 888888 # comment this for netCDF4\n",
"intvar_fill[:] = data.filled(intvar_fill._FillValue)\n",
"printr(intvar_fill[:])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now the same with conflicting `_FillValue = 1e30`:"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 7. Float data filled with incompatible `_FillValue` to int variable -- FAILS SILENTLY\n",
"\n",
"NB: With `netCDF4`, `_FillValue` has to be assigned during `createVariable` and is checked for compatibility. Therefore, in `netCDF4`, this raises an `OverflowError`."
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "OverflowError",
"evalue": "Python int too large to convert to C long",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mOverflowError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-14-90df8d6ffba2>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mintvar_fill_bad\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mds\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcreateVariable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'intvar_fill_bad'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'i'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;34m'time'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfill_value\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1e30\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[1;31m#intvar_fill_bad._FillValue = 1e30\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0mintvar_fill_bad\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfilled\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mintvar_fill_bad\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_FillValue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;31m#printr(intvar_fill[:]) # printing raises an OverflowError error in scipy, though\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mnetCDF4/_netCDF4.pyx\u001b[0m in \u001b[0;36mnetCDF4._netCDF4.Dataset.createVariable (netCDF4/_netCDF4.c:15837)\u001b[1;34m()\u001b[0m\n",
"\u001b[1;32mnetCDF4/_netCDF4.pyx\u001b[0m in \u001b[0;36mnetCDF4._netCDF4.Variable.__init__ (netCDF4/_netCDF4.c:27980)\u001b[1;34m()\u001b[0m\n",
"\u001b[1;31mOverflowError\u001b[0m: Python int too large to convert to C long"
]
}
],
"source": [
"intvar_fill_bad = ds.createVariable('intvar_fill_bad', 'i', ('time',), fill_value=1e30)\n",
"#intvar_fill_bad._FillValue = 1e30\n",
"intvar_fill_bad[:] = data.filled(intvar_fill_bad._FillValue)\n",
"#printr(intvar_fill[:]) # printing raises an OverflowError error in scipy, though"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"ds.close()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"But the data is not masked correctly:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"!ncdump -v intvar_fill_bad $fname"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.0rc4"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment