Skip to content

Instantly share code, notes, and snippets.

@rossant
Last active August 29, 2015 13:56
Show Gist options
  • Save rossant/9210992 to your computer and use it in GitHub Desktop.
Save rossant/9210992 to your computer and use it in GitHub Desktop.
HDF5 benchmarks
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "",
"signature": "sha256:00ad3431755f65150e810bb53ad257b5f0303a21f98d616849338f1bdde6d657"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import os\n",
"import time\n",
"import h5py\n",
"import numpy as np"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"n, k, l = 1000000, 50, 128"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def create(filename):\n",
" \"\"\"Create a contiguous array of size (n,k,l).\"\"\"\n",
" with h5py.File(filename, \"w\") as f:\n",
" a = f.create_dataset('/test',dtype=np.int16,\n",
" shape=(n,k,l))\n",
" n_ = n//50\n",
" for i in range(50):\n",
" print i,\n",
" a[i*n_:(i+1)*n_,...] = np.random.randint(size=(n_,k,l),\n",
" low=-32000,\n",
" high=32000)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create the file if necessary."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"filename = 'waveforms.h5' # 12 GB\n",
"if not os.path.exists(filename):\n",
" create(filename)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This function loads an array from disk."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def read(a, out):\n",
" for j, i in enumerate(ind):\n",
" out[j:j+1,...] = a[i:i+1,...]\n",
" return out"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Choose rows randomly."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"size = 1000\n",
"ind = np.random.randint(size=size, low=0, high=n)\n",
"ind = np.unique(ind)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Load those rows in memory from HDF5."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"with h5py.File(filename, \"r\") as f:\n",
" a = f['/test']\n",
" out = np.empty((len(ind),k,l), dtype=a.dtype)\n",
" t0 = time.clock()\n",
" read(a, out)\n",
" t1 = time.clock()\n",
"d = t1-t0"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"bandwidth = size*k*l*2/(1024*1024.)/d\n",
"print(\"Time: {0:.2f} s\".format(d))\n",
"print(\"Bandwidth: {0:.1f} MB/s\".format(bandwidth))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Time: 5.61 s\n",
"Bandwidth: 2.2 MB/s\n"
]
}
],
"prompt_number": 8
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment