Last active
August 29, 2015 13:57
-
-
Save rossant/9467085 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:b57688628ab9dced9a7a1cd4ca63a4ecbbb88f993ce65bc2e0a4f5cf9fe4e278" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Slicing huge arrays in h5py" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Let's import h5py and NumPy." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import h5py\n", | |
"import numpy as np\n", | |
"import os" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Chunked array" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"n, k = 1000000, 500" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 11 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"if not os.path.exists('test_ind'):\n", | |
" # We create the file.\n", | |
" with h5py.File(\"test_ind\", \"w\") as f:\n", | |
" a = f.create_dataset('/test', shape=(n, k),\n", | |
" chunks=(100, k))\n", | |
" # We fill the array progressively.\n", | |
" for i in range(10):\n", | |
" print i,\n", | |
" a[i*(n//10):(i+1)*(n//10),...] = np.random.rand((n//10), k)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"0 " | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"1 " | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"2 " | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"3 " | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"4 " | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"5 " | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"6 " | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"7 " | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"8 " | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"9\n" | |
] | |
} | |
], | |
"prompt_number": 12 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Now, let's do some benchmarks." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"ind = np.random.randint(size=1000, low=0, high=n)\n", | |
"ind = np.sort(ind)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 15 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"with h5py.File(\"test_ind\", \"r\") as f:\n", | |
" a = f['/test']\n", | |
" \n", | |
" %timeit -r1 -n1 [a[i,...] for i in ind]\n", | |
" %timeit [a[i,...] for i in ind]\n", | |
" %timeit -r1 -n1 [a[i:i+1,...] for i in ind]\n", | |
" %timeit [a[i:i+1,...] for i in ind]\n", | |
" #%timeit -r1 -n1 a[:,...]\n", | |
" #%timeit -r1 -n1 a[:,...][ind,...]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"1 loops, best of 1: 198 ms per loop\n", | |
"10 loops, best of 3: 180 ms per loop" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"1 loops, best of 1: 177 ms per loop" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"10 loops, best of 3: 177 ms per loop" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"prompt_number": 16 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Contiguous array" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Let's try with a contiguous array." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"if not os.path.exists('test_ind_contiguous'):\n", | |
" # We create the file.\n", | |
" with h5py.File(\"test_ind_contiguous\", \"w\") as f:\n", | |
" a = f.create_dataset('/test', shape=(n, k),)\n", | |
" # We fill the array progressively.\n", | |
" for i in range(10):\n", | |
" print i,\n", | |
" a[i*(n//10):(i+1)*(n//10),...] = np.random.rand((n//10), k)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"with h5py.File(\"test_ind_contiguous\", \"r\") as f:\n", | |
" a = f['/test']\n", | |
" \n", | |
" %timeit -r1 -n1 [a[i,...] for i in ind]\n", | |
" %timeit [a[i,...] for i in ind]\n", | |
" %timeit -r1 -n1 [a[i:i+1,...] for i in ind]\n", | |
" %timeit [a[i:i+1,...] for i in ind]\n", | |
" #%timeit -r1 -n1 a[:,...]\n", | |
" #%timeit -r1 -n1 a[:,...][ind,...]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"1 loops, best of 1: 159 ms per loop\n", | |
"10 loops, best of 3: 149 ms per loop" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"1 loops, best of 1: 147 ms per loop" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"10 loops, best of 3: 141 ms per loop" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n" | |
] | |
} | |
], | |
"prompt_number": 9 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment