jreadey/wtk_compute_mean.ipynb

## wtk_compute_mean.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import h5pyd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Open the wind data \"file\"\n",
    "# server endpoint, username, password is found via a config file\n",
    "f = h5pyd.File(\"/nrel/wtk-us.h5\", 'r') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "dset = f['windspeed_100m']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(61368, 1602, 2976)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dset.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(24, 89, 186)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dset.chunks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2557.0"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "61368/24  # number of chunks in time dimension"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3.784469783306122"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "24*89*186*4*2557/(1024**3)  # size of one column (in time) of chunks in GB "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "arr = np.zeros((240,89,186),dtype=np.float)  # numpy array to store 10 days of chunks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 28 ms, sys: 36 ms, total: 64 ms\n",
      "Wall time: 6.36 s\n"
     ]
    }
   ],
   "source": [
    "%time arr[:,:,:] = dset[0:240,0:89,0:186]    # read 10 days"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "def calc_mean(arr):\n",
    "    arr_mean = np.zeros((89,186))  # array to store mean values\n",
    "    for i in range(89):\n",
    "        for j in range(186):\n",
    "            arr_mean[i,j] = arr[:,i,j].mean()\n",
    "    return arr_mean\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 92 ms, sys: 4 ms, total: 96 ms\n",
      "Wall time: 93.7 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([[9.61118126, 9.58198522, 9.55314557, ..., 8.51358325, 8.54330053,\n",
       "        8.54180031],\n",
       "       [9.57530956, 9.56639554, 9.56540403, ..., 8.51742347, 8.5096412 ,\n",
       "        8.49697638],\n",
       "       [9.58162972, 9.57684822, 9.57565279, ..., 8.47134066, 8.44164877,\n",
       "        8.43831708],\n",
       "       ...,\n",
       "       [9.33155575, 9.34589939, 9.35558863, ..., 8.28590247, 8.25944064,\n",
       "        8.23347441],\n",
       "       [9.35494076, 9.35869188, 9.3528677 , ..., 8.26886285, 8.22571777,\n",
       "        8.21325582],\n",
       "       [9.35735626, 9.34724738, 9.33483626, ..., 8.25140394, 8.23622106,\n",
       "        8.24652081]])"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%time calc_mean(arr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "255.7"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "61368/240  # number of times we need to do this for the entire 7 year range"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1530"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "255*6    # number of seconds to compute"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "440640"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "1530 * (1602//89)*(2976//186)  # seconds for the entire dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "122.4"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "440640/(60*60)   # number of hours"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"%matplotlib inline\n",
	"import h5pyd\n",
	"import numpy as np"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Open the wind data \"file\"\n",
	"# server endpoint, username, password is found via a config file\n",
	"f = h5pyd.File(\"/nrel/wtk-us.h5\", 'r') "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"dset = f['windspeed_100m']"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"(61368, 1602, 2976)"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"dset.shape"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"(24, 89, 186)"
	]
	},
	"execution_count": 5,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"dset.chunks"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"2557.0"
	]
	},
	"execution_count": 6,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"61368/24 # number of chunks in time dimension"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 25,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"3.784469783306122"
	]
	},
	"execution_count": 25,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"248918642557/(1024**3) # size of one column (in time) of chunks in GB "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [],
	"source": [
	"arr = np.zeros((240,89,186),dtype=np.float) # numpy array to store 10 days of chunks"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"CPU times: user 28 ms, sys: 36 ms, total: 64 ms\n",
	"Wall time: 6.36 s\n"
	]
	}
	],
	"source": [
	"%time arr[:,:,:] = dset[0:240,0:89,0:186] # read 10 days"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 26,
	"metadata": {},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": 29,
	"metadata": {},
	"outputs": [],
	"source": [
	"def calc_mean(arr):\n",
	" arr_mean = np.zeros((89,186)) # array to store mean values\n",
	" for i in range(89):\n",
	" for j in range(186):\n",
	" arr_mean[i,j] = arr[:,i,j].mean()\n",
	" return arr_mean\n",
	" "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 30,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"CPU times: user 92 ms, sys: 4 ms, total: 96 ms\n",
	"Wall time: 93.7 ms\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"array([[9.61118126, 9.58198522, 9.55314557, ..., 8.51358325, 8.54330053,\n",
	" 8.54180031],\n",
	" [9.57530956, 9.56639554, 9.56540403, ..., 8.51742347, 8.5096412 ,\n",
	" 8.49697638],\n",
	" [9.58162972, 9.57684822, 9.57565279, ..., 8.47134066, 8.44164877,\n",
	" 8.43831708],\n",
	" ...,\n",
	" [9.33155575, 9.34589939, 9.35558863, ..., 8.28590247, 8.25944064,\n",
	" 8.23347441],\n",
	" [9.35494076, 9.35869188, 9.3528677 , ..., 8.26886285, 8.22571777,\n",
	" 8.21325582],\n",
	" [9.35735626, 9.34724738, 9.33483626, ..., 8.25140394, 8.23622106,\n",
	" 8.24652081]])"
	]
	},
	"execution_count": 30,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"%time calc_mean(arr)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"255.7"
	]
	},
	"execution_count": 13,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"61368/240 # number of times we need to do this for the entire 7 year range"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"1530"
	]
	},
	"execution_count": 14,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"255*6 # number of seconds to compute"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 32,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"440640"
	]
	},
	"execution_count": 32,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"1530 * (1602//89)*(2976//186) # seconds for the entire dataset"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 33,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"122.4"
	]
	},
	"execution_count": 33,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"440640/(60*60) # number of hours"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}