Skip to content

Instantly share code, notes, and snippets.

@monkeybutter
Created October 23, 2015 04:06
Show Gist options
  • Save monkeybutter/b55180d70bf23a583c35 to your computer and use it in GitHub Desktop.
Save monkeybutter/b55180d70bf23a583c35 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import glob\n",
"from random import randint\n",
"\n",
"def get_files(path):\n",
" path = \"{}*.h5\".format(path)\n",
" return glob.glob(path)\n",
"\n",
"#start & end both included\n",
"def get_rdm_list(number, start, end):\n",
" out = []\n",
" ready = False\n",
" \n",
" while not ready and len(out) < number:\n",
" n = randint(start, end)\n",
" if n not in out:\n",
" out.append(n)\n",
" \n",
" return out"
]
},
{
"cell_type": "code",
"execution_count": 144,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 17.1 s, sys: 200 ms, total: 17.3 s\n",
"Wall time: 17.7 s\n"
]
}
],
"source": [
"import h5py\n",
"from osgeo import gdal\n",
"\n",
"#image is composed of 8 tile\n",
"def get_image(path):\n",
" paths = get_files(path)\n",
" f_idxs = get_rdm_list(8, 0, len(paths)-1)\n",
" \n",
" for f_idx in f_idxs:\n",
" with h5py.File(paths[f_idx], 'r') as h5f:\n",
" cube = h5f[\"NBAR\"][:, :, randint(0, 5)]\n",
"\n",
"\n",
"#image is composed of 4 tiles\n",
"def get_gdal_image(path):\n",
" paths = get_files(path)\n",
" f_idxs = get_rdm_list(8, 0, len(paths)-1)\n",
" \n",
" for f_idx in f_idxs:\n",
" image = gdal.Open(paths[f_idx])\n",
" for i in range(6):#band = randint(1, 6)\n",
" band_pack = image.GetRasterBand(i+1).ReadAsArray()\n",
" \n",
"\n",
"%time get_image('/g/data1/ep1_2/z00/prl900/hdf5_test/1A_LZF_C1/')"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Compose a one band image 8 tiles\n",
"\n",
"#0_geotiff 35GB\n",
"res_0_geotiff = [8.75, 6.91, 8.29, 6.06, 7.63, 7.01, 13.4, 10.7, 6.14, 7.95]\n",
"\n",
"#1A_hdf5 (4000, 4000, 6) 57GB\n",
"res_1A_hdf5 = [3.59, 3.61, 4.33, 3.61, 3.38, 3.65, 4.03, 3.68, 2.69, 3.23]\n",
"\n",
"#1B_hdf5 (6, 4000, 4000) 57GB\n",
"res_1B_hdf5 = [1.59, 1.32, 1.62, 1.35, 1.5, 1.57, 1.64, 1.66, 1.44, 1.3]\n",
"\n",
"#1A_LZF (4000, 4000, 6) 18GB\n",
"res_1A_LZF = [21.8, 26.9, 27.6, 27.6, 19.7, 22.7, 22.7, 24.6, 27.9, 22.2]\n",
"\n",
"#1A_GZIP (4000, 4000, 6) 13GB\n",
"res_1A_GZIP = [25.1, 26.8, 24.4, 26.4, 26.6, 26.1, 27.7, 25.2, 26.9, 21.7]\n",
"\n",
"#1B_LZF (6, 4000, 4000) 16GB\n",
"res_1B_LZF = [8.53, 12.2, 6.87, 6.29, 7.49, 11.6, 8.9, 9.95, 8.24, 3.86]\n",
"\n",
"#1B_GZIP (6, 4000, 4000) 11GB\n",
"res_1B_GZIP = [8.14, 10.4, 4.23, 9.46, 7.93, 4.16, 9.16, 6.36, 5.56, 5.42]\n",
"\n",
"#1A_LZF_C1 (4000, 4000, 6) chunked (4000, 4000, 1)\n",
"res_1A_LZF_C1 = [17.8, 18.0, 17.3, 17.1, 17.5, 17.4, 17.7, 17.4, 17.3, 17.7]\n",
"\n",
"#1A_GZIP_C1 (4000, 4000, 6) chunked (4000, 4000, 1)\n",
"res_1A_GZIP_C1 = [18.0, 17.8, 17.6, 17.3, 17.7, 17.4, 18.3, 17.5, 17.7, 17.9]\n",
"\n",
"#1B_LZF_C1 (6, 4000, 4000) chunked (1, 4000, 4000)\n",
"res_1B_LZF_C1 = [1.16, 0.89, 0.75, 0.72, 0.74, 0.79, 0.64, 0.94, 0.60, 0.76]\n",
"\n",
"#1B_GZIP_C1 (6, 4000, 4000) chunked (1, 4000, 4000)\n",
"res_1B_GZIP_C1 = [1.65, 2.01, 1.76, 0.82, 0.68, 1.12, 0.86, 1.17, 0.73, 1.25]\n",
"\n",
"\n",
"# Read 6 bands image 8 tiles\n",
"\n",
"#0_geotiff\n",
"res_0_geotiff = [43.5, 48.3, 48.5, 41.5, 33.8, 34.6, 36.7, 30.3, 36.2, 37.6]\n",
"\n",
"#1A_hdf5 (4000, 4000, 6)\n",
"res_1A_hdf5 = [6.51, 8.58, 7.78, 8.9, 7.99, 7.46, 3.93, 7.73, 9.46, 7.59]\n",
"\n",
"#1B_hdf5 (6, 4000, 4000)\n",
"res_1B_hdf5 = [8.66, 8.24, 5.98, 6.25, 7.32, 6.21, 7.31, 8.4, 7.19, 7.59]"
]
},
{
"cell_type": "code",
"execution_count": 230,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"78\n",
"CPU times: user 2.42 s, sys: 83 ms, total: 2.5 s\n",
"Wall time: 2.68 s\n"
]
}
],
"source": [
"#timeseries is composed of 80 tiles\n",
"def get_timeseries(path):\n",
" paths = get_files(path)\n",
" f_idxs = get_rdm_list(len(paths)/4, 0, len(paths)-1)\n",
" band = randint(0, 5)\n",
" x = randint(0, 3999)\n",
" y = randint(0, 3999)\n",
" \n",
" for f_idx in f_idxs:\n",
" with h5py.File(paths[f_idx], 'r') as h5f:\n",
" value = h5f[\"NBAR\"][x, y, band]\n",
" \n",
"def get_gdal_timeseries(path):\n",
"\n",
" paths = get_files(path)\n",
" print len(paths)/4\n",
" f_idxs = get_rdm_list(len(paths)/4, 0, len(paths)-1)\n",
" band = randint(0, 5)\n",
" x = randint(0, 3999)\n",
" y = randint(0, 3999)\n",
" \n",
" for f_idx in f_idxs:\n",
" image = gdal.Open(paths[f_idx])\n",
" n = image.GetRasterBand(band+1).ReadAsArray(x, y, 1, 1)\n",
" \n",
" \n",
"%time get_gdal_timeseries('/g/data1/ep1_2/z00/prl900/hdf5_test/1A_GZIP_C1/')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"\n",
"\n",
"## Create a timeseries of 80 images\n",
"\n",
"#0_geotiff\n",
"res_0_geotiff = [3.35, 2.07, 1.47, 1.82, 0.92, 1.0, 1.8, 1.12, 1.26, 0.82]\n",
"\n",
"#1A_hdf5 (4000, 4000, 6)\n",
"res_1A_hdf5 = [0.23, 0.22, 0.17, 0.28, 0.2, 0.38, 0.25, 0.25, 0.22, 0.26]\n",
"\n",
"#1B_hdf5 (6, 4000, 4000)\n",
"res_1B_hdf5 = [0.24, 0.2, 0.23, 0.22, 0.28, 0.13, 0.37, 0.2, 0.15, 0.16]\n",
"\n",
"#1A_LZF (4000, 4000, 6) 18GB\n",
"res_1A_LZF = [6.88, 5.75, 4.22, 4.56, 4.38, 3.53, 2.88, 3.11, 3.14, 3.04]\n",
"\n",
"#1A_GZIP (4000, 4000, 6) 13GB\n",
"res_1A_GZIP = [7.12, 6.1, 5.15, 4.47, 3.6, 4.17, 3.38, 3.19, 3.37, 3.09]\n",
"\n",
"#1B_LZF (6, 4000, 4000) 16GB\n",
"res_1B_LZF = [54.1, 52.2, 50.9, 41.6, 31.6, 34.7, 34.6, 38.3, 33.4, 32.1]\n",
"\n",
"#1B_GZIP (6, 4000, 4000) 11GB\n",
"res_1B_GZIP = [63.3, 71, 61, 53.4, 53.8, 50.1, 48.9, 49.2, 48.8, 50.5]\n",
"\n",
"#1A_LZF_C1 (4000, 4000, 6) chunked (4000, 4000, 1)\n",
"res_1A_LZF_C1 = [6.35, 5.23, 5.02, 3.66, 3.45, 3.68, 3.08, 3.2, 2.9, 2.86]\n",
"\n",
"#1A_GZIP_C1 (4000, 4000, 6) chunked (4000, 4000, 1)\n",
"res_1A_GZIP_C1 = [4.37, 4.12, 3.89, 3.45, 3.44, 3.12, 2.82, 2.79, 3.05, 2.68]\n",
"\n",
"#1B_LZF_C1 (6, 4000, 4000) chunked (1, 4000, 4000)\n",
"res_1B_LZF_C1 = [14.3, 15.5, 11.4, 10.2, 8.9, 10.6, 12.1, 9.56, 11.0, 6.33]\n",
"\n",
"#1B_GZIP_C1 (6, 4000, 4000) chunked (1, 4000, 4000)\n",
"res_1B_GZIP_C1 = [15.5, 13.9, 12.0, 16.0, 17.0, 11.7, 11.7, 15.2, 11.5, 10.8]"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import os.path\n",
"\n",
"def get_files(path):\n",
" path = \"{}*.h5\".format(path)\n",
" return glob.glob(path)\n",
"\n",
"def get_filename(path):\n",
" return path.split('/')[-1]\n",
"\n",
"def repack(in_folder, out_folder):\n",
" exp_path = \"/g/data1/ep1_2/z00/prl900/hdf5_test/\"\n",
" in_paths = get_files(exp_path + in_folder + '/')\n",
" \n",
" for in_path in in_paths:\n",
" if not os.path.isfile(exp_path + out_folder + '/' + get_filename(in_path)):\n",
" with h5py.File(in_path, 'r') as h5i:\n",
" cube = h5i[\"NBAR\"].value\n",
" with h5py.File(exp_path + out_folder + '/' + get_filename(in_path), 'w') as h5o:\n",
" h5o.create_dataset(\"NBAR\", data=cube, compression='gzip', chunks=(4000, 4000, 1))\n",
" \n",
"repack('1A_hdf5', '1A_GZIP_C1')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment