Skip to content

Instantly share code, notes, and snippets.

@jreadey
Created May 25, 2018 23:26
Show Gist options
  • Save jreadey/218be94177a1d8892a7063d99cde856a to your computer and use it in GitHub Desktop.
Save jreadey/218be94177a1d8892a7063d99cde856a to your computer and use it in GitHub Desktop.
Estimate to compute mean for NREL wtk dataset
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import h5pyd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Open the wind data \"file\"\n",
"# server endpoint, username, password is found via a config file\n",
"f = h5pyd.File(\"/nrel/wtk-us.h5\", 'r') "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"dset = f['windspeed_100m']"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(61368, 1602, 2976)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dset.shape"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(24, 89, 186)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dset.chunks"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2557.0"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"61368/24 # number of chunks in time dimension"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3.784469783306122"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"24*89*186*4*2557/(1024**3) # size of one column (in time) of chunks in GB "
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"arr = np.zeros((240,89,186),dtype=np.float) # numpy array to store 10 days of chunks"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 28 ms, sys: 36 ms, total: 64 ms\n",
"Wall time: 6.36 s\n"
]
}
],
"source": [
"%time arr[:,:,:] = dset[0:240,0:89,0:186] # read 10 days"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"def calc_mean(arr):\n",
" arr_mean = np.zeros((89,186)) # array to store mean values\n",
" for i in range(89):\n",
" for j in range(186):\n",
" arr_mean[i,j] = arr[:,i,j].mean()\n",
" return arr_mean\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 92 ms, sys: 4 ms, total: 96 ms\n",
"Wall time: 93.7 ms\n"
]
},
{
"data": {
"text/plain": [
"array([[9.61118126, 9.58198522, 9.55314557, ..., 8.51358325, 8.54330053,\n",
" 8.54180031],\n",
" [9.57530956, 9.56639554, 9.56540403, ..., 8.51742347, 8.5096412 ,\n",
" 8.49697638],\n",
" [9.58162972, 9.57684822, 9.57565279, ..., 8.47134066, 8.44164877,\n",
" 8.43831708],\n",
" ...,\n",
" [9.33155575, 9.34589939, 9.35558863, ..., 8.28590247, 8.25944064,\n",
" 8.23347441],\n",
" [9.35494076, 9.35869188, 9.3528677 , ..., 8.26886285, 8.22571777,\n",
" 8.21325582],\n",
" [9.35735626, 9.34724738, 9.33483626, ..., 8.25140394, 8.23622106,\n",
" 8.24652081]])"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%time calc_mean(arr)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"255.7"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"61368/240 # number of times we need to do this for the entire 7 year range"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1530"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"255*6 # number of seconds to compute"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"440640"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"1530 * (1602//89)*(2976//186) # seconds for the entire dataset"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"122.4"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"440640/(60*60) # number of hours"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment