Created
October 23, 2015 04:06
-
-
Save monkeybutter/b55180d70bf23a583c35 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import glob\n", | |
"from random import randint\n", | |
"\n", | |
"def get_files(path):\n", | |
" path = \"{}*.h5\".format(path)\n", | |
" return glob.glob(path)\n", | |
"\n", | |
"#start & end both included\n", | |
"def get_rdm_list(number, start, end):\n", | |
" out = []\n", | |
" ready = False\n", | |
" \n", | |
" while not ready and len(out) < number:\n", | |
" n = randint(start, end)\n", | |
" if n not in out:\n", | |
" out.append(n)\n", | |
" \n", | |
" return out" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 144, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 17.1 s, sys: 200 ms, total: 17.3 s\n", | |
"Wall time: 17.7 s\n" | |
] | |
} | |
], | |
"source": [ | |
"import h5py\n", | |
"from osgeo import gdal\n", | |
"\n", | |
"#image is composed of 8 tile\n", | |
"def get_image(path):\n", | |
" paths = get_files(path)\n", | |
" f_idxs = get_rdm_list(8, 0, len(paths)-1)\n", | |
" \n", | |
" for f_idx in f_idxs:\n", | |
" with h5py.File(paths[f_idx], 'r') as h5f:\n", | |
" cube = h5f[\"NBAR\"][:, :, randint(0, 5)]\n", | |
"\n", | |
"\n", | |
"#image is composed of 4 tiles\n", | |
"def get_gdal_image(path):\n", | |
" paths = get_files(path)\n", | |
" f_idxs = get_rdm_list(8, 0, len(paths)-1)\n", | |
" \n", | |
" for f_idx in f_idxs:\n", | |
" image = gdal.Open(paths[f_idx])\n", | |
" for i in range(6):#band = randint(1, 6)\n", | |
" band_pack = image.GetRasterBand(i+1).ReadAsArray()\n", | |
" \n", | |
"\n", | |
"%time get_image('/g/data1/ep1_2/z00/prl900/hdf5_test/1A_LZF_C1/')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 81, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Compose a one band image 8 tiles\n", | |
"\n", | |
"#0_geotiff 35GB\n", | |
"res_0_geotiff = [8.75, 6.91, 8.29, 6.06, 7.63, 7.01, 13.4, 10.7, 6.14, 7.95]\n", | |
"\n", | |
"#1A_hdf5 (4000, 4000, 6) 57GB\n", | |
"res_1A_hdf5 = [3.59, 3.61, 4.33, 3.61, 3.38, 3.65, 4.03, 3.68, 2.69, 3.23]\n", | |
"\n", | |
"#1B_hdf5 (6, 4000, 4000) 57GB\n", | |
"res_1B_hdf5 = [1.59, 1.32, 1.62, 1.35, 1.5, 1.57, 1.64, 1.66, 1.44, 1.3]\n", | |
"\n", | |
"#1A_LZF (4000, 4000, 6) 18GB\n", | |
"res_1A_LZF = [21.8, 26.9, 27.6, 27.6, 19.7, 22.7, 22.7, 24.6, 27.9, 22.2]\n", | |
"\n", | |
"#1A_GZIP (4000, 4000, 6) 13GB\n", | |
"res_1A_GZIP = [25.1, 26.8, 24.4, 26.4, 26.6, 26.1, 27.7, 25.2, 26.9, 21.7]\n", | |
"\n", | |
"#1B_LZF (6, 4000, 4000) 16GB\n", | |
"res_1B_LZF = [8.53, 12.2, 6.87, 6.29, 7.49, 11.6, 8.9, 9.95, 8.24, 3.86]\n", | |
"\n", | |
"#1B_GZIP (6, 4000, 4000) 11GB\n", | |
"res_1B_GZIP = [8.14, 10.4, 4.23, 9.46, 7.93, 4.16, 9.16, 6.36, 5.56, 5.42]\n", | |
"\n", | |
"#1A_LZF_C1 (4000, 4000, 6) chunked (4000, 4000, 1)\n", | |
"res_1A_LZF_C1 = [17.8, 18.0, 17.3, 17.1, 17.5, 17.4, 17.7, 17.4, 17.3, 17.7]\n", | |
"\n", | |
"#1A_GZIP_C1 (4000, 4000, 6) chunked (4000, 4000, 1)\n", | |
"res_1A_GZIP_C1 = [18.0, 17.8, 17.6, 17.3, 17.7, 17.4, 18.3, 17.5, 17.7, 17.9]\n", | |
"\n", | |
"#1B_LZF_C1 (6, 4000, 4000) chunked (1, 4000, 4000)\n", | |
"res_1B_LZF_C1 = [1.16, 0.89, 0.75, 0.72, 0.74, 0.79, 0.64, 0.94, 0.60, 0.76]\n", | |
"\n", | |
"#1B_GZIP_C1 (6, 4000, 4000) chunked (1, 4000, 4000)\n", | |
"res_1B_GZIP_C1 = [1.65, 2.01, 1.76, 0.82, 0.68, 1.12, 0.86, 1.17, 0.73, 1.25]\n", | |
"\n", | |
"\n", | |
"# Read 6 bands image 8 tiles\n", | |
"\n", | |
"#0_geotiff\n", | |
"res_0_geotiff = [43.5, 48.3, 48.5, 41.5, 33.8, 34.6, 36.7, 30.3, 36.2, 37.6]\n", | |
"\n", | |
"#1A_hdf5 (4000, 4000, 6)\n", | |
"res_1A_hdf5 = [6.51, 8.58, 7.78, 8.9, 7.99, 7.46, 3.93, 7.73, 9.46, 7.59]\n", | |
"\n", | |
"#1B_hdf5 (6, 4000, 4000)\n", | |
"res_1B_hdf5 = [8.66, 8.24, 5.98, 6.25, 7.32, 6.21, 7.31, 8.4, 7.19, 7.59]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 230, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"78\n", | |
"CPU times: user 2.42 s, sys: 83 ms, total: 2.5 s\n", | |
"Wall time: 2.68 s\n" | |
] | |
} | |
], | |
"source": [ | |
"#timeseries is composed of 80 tiles\n", | |
"def get_timeseries(path):\n", | |
" paths = get_files(path)\n", | |
" f_idxs = get_rdm_list(len(paths)/4, 0, len(paths)-1)\n", | |
" band = randint(0, 5)\n", | |
" x = randint(0, 3999)\n", | |
" y = randint(0, 3999)\n", | |
" \n", | |
" for f_idx in f_idxs:\n", | |
" with h5py.File(paths[f_idx], 'r') as h5f:\n", | |
" value = h5f[\"NBAR\"][x, y, band]\n", | |
" \n", | |
"def get_gdal_timeseries(path):\n", | |
"\n", | |
" paths = get_files(path)\n", | |
" print len(paths)/4\n", | |
" f_idxs = get_rdm_list(len(paths)/4, 0, len(paths)-1)\n", | |
" band = randint(0, 5)\n", | |
" x = randint(0, 3999)\n", | |
" y = randint(0, 3999)\n", | |
" \n", | |
" for f_idx in f_idxs:\n", | |
" image = gdal.Open(paths[f_idx])\n", | |
" n = image.GetRasterBand(band+1).ReadAsArray(x, y, 1, 1)\n", | |
" \n", | |
" \n", | |
"%time get_gdal_timeseries('/g/data1/ep1_2/z00/prl900/hdf5_test/1A_GZIP_C1/')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"\n", | |
"## Create a timeseries of 80 images\n", | |
"\n", | |
"#0_geotiff\n", | |
"res_0_geotiff = [3.35, 2.07, 1.47, 1.82, 0.92, 1.0, 1.8, 1.12, 1.26, 0.82]\n", | |
"\n", | |
"#1A_hdf5 (4000, 4000, 6)\n", | |
"res_1A_hdf5 = [0.23, 0.22, 0.17, 0.28, 0.2, 0.38, 0.25, 0.25, 0.22, 0.26]\n", | |
"\n", | |
"#1B_hdf5 (6, 4000, 4000)\n", | |
"res_1B_hdf5 = [0.24, 0.2, 0.23, 0.22, 0.28, 0.13, 0.37, 0.2, 0.15, 0.16]\n", | |
"\n", | |
"#1A_LZF (4000, 4000, 6) 18GB\n", | |
"res_1A_LZF = [6.88, 5.75, 4.22, 4.56, 4.38, 3.53, 2.88, 3.11, 3.14, 3.04]\n", | |
"\n", | |
"#1A_GZIP (4000, 4000, 6) 13GB\n", | |
"res_1A_GZIP = [7.12, 6.1, 5.15, 4.47, 3.6, 4.17, 3.38, 3.19, 3.37, 3.09]\n", | |
"\n", | |
"#1B_LZF (6, 4000, 4000) 16GB\n", | |
"res_1B_LZF = [54.1, 52.2, 50.9, 41.6, 31.6, 34.7, 34.6, 38.3, 33.4, 32.1]\n", | |
"\n", | |
"#1B_GZIP (6, 4000, 4000) 11GB\n", | |
"res_1B_GZIP = [63.3, 71, 61, 53.4, 53.8, 50.1, 48.9, 49.2, 48.8, 50.5]\n", | |
"\n", | |
"#1A_LZF_C1 (4000, 4000, 6) chunked (4000, 4000, 1)\n", | |
"res_1A_LZF_C1 = [6.35, 5.23, 5.02, 3.66, 3.45, 3.68, 3.08, 3.2, 2.9, 2.86]\n", | |
"\n", | |
"#1A_GZIP_C1 (4000, 4000, 6) chunked (4000, 4000, 1)\n", | |
"res_1A_GZIP_C1 = [4.37, 4.12, 3.89, 3.45, 3.44, 3.12, 2.82, 2.79, 3.05, 2.68]\n", | |
"\n", | |
"#1B_LZF_C1 (6, 4000, 4000) chunked (1, 4000, 4000)\n", | |
"res_1B_LZF_C1 = [14.3, 15.5, 11.4, 10.2, 8.9, 10.6, 12.1, 9.56, 11.0, 6.33]\n", | |
"\n", | |
"#1B_GZIP_C1 (6, 4000, 4000) chunked (1, 4000, 4000)\n", | |
"res_1B_GZIP_C1 = [15.5, 13.9, 12.0, 16.0, 17.0, 11.7, 11.7, 15.2, 11.5, 10.8]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 106, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import os.path\n", | |
"\n", | |
"def get_files(path):\n", | |
" path = \"{}*.h5\".format(path)\n", | |
" return glob.glob(path)\n", | |
"\n", | |
"def get_filename(path):\n", | |
" return path.split('/')[-1]\n", | |
"\n", | |
"def repack(in_folder, out_folder):\n", | |
" exp_path = \"/g/data1/ep1_2/z00/prl900/hdf5_test/\"\n", | |
" in_paths = get_files(exp_path + in_folder + '/')\n", | |
" \n", | |
" for in_path in in_paths:\n", | |
" if not os.path.isfile(exp_path + out_folder + '/' + get_filename(in_path)):\n", | |
" with h5py.File(in_path, 'r') as h5i:\n", | |
" cube = h5i[\"NBAR\"].value\n", | |
" with h5py.File(exp_path + out_folder + '/' + get_filename(in_path), 'w') as h5o:\n", | |
" h5o.create_dataset(\"NBAR\", data=cube, compression='gzip', chunks=(4000, 4000, 1))\n", | |
" \n", | |
"repack('1A_hdf5', '1A_GZIP_C1')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.10" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment