Skip to content

Instantly share code, notes, and snippets.

@richardjgowers
Created July 16, 2022 18:15
Show Gist options
  • Save richardjgowers/3afc8516896594be9d98e01bba761e32 to your computer and use it in GitHub Desktop.
Save richardjgowers/3afc8516896594be9d98e01bba761e32 to your computer and use it in GitHub Desktop.
zarr prototype
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 34,
"id": "acdf6cf8",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import zarr\n",
"from zarr.storage import BaseStore\n",
"\n",
"from zarr.storage import init_array, init_group"
]
},
{
"cell_type": "code",
"execution_count": 163,
"id": "2720da69",
"metadata": {},
"outputs": [],
"source": [
"# make my data\n",
"\n",
"nframes = 5\n",
"natoms = 100\n",
"\n",
"data = np.arange(nframes * natoms * 3).reshape(nframes, natoms, -1).astype(np.float64)\n",
"\n",
"with open('data.hugo', 'wb') as f:\n",
" f.write(data.tobytes())"
]
},
{
"cell_type": "code",
"execution_count": 164,
"id": "fb1b1652",
"metadata": {},
"outputs": [],
"source": [
"# read my data\n",
"\n",
"def create_meta_store(arr):\n",
" store = dict()\n",
" \n",
" init_group(store)\n",
" #init_attrs(store, dict())\n",
" \n",
" init_array(store,\n",
" path='cheeky',\n",
" shape=arr.shape,\n",
" chunks=10,\n",
" #chunks=(1, 100, 3),\n",
" dtype=np.float64,\n",
" compressor=None)\n",
" \n",
" return store\n",
"\n",
"\n",
"class HugoStore(BaseStore):\n",
" def __init__(self, path, natoms):\n",
" self._path = path\n",
" self._data = np.fromfile(path, dtype=np.float64).reshape(-1, natoms, 3)\n",
" self._store = create_meta_store(self._data)\n",
" \n",
" def __len__(self):\n",
" return self._data.shape[0]\n",
" \n",
" def __iter__(self):\n",
" yield from self.keys()\n",
"\n",
" def __getitem__(self, key):\n",
" if key in self._store:\n",
" return self._store[key]\n",
" else:\n",
" print(key)\n",
" x, y, z = map(int, key.split('.'))\n",
"\n",
" return self._data[x]\n",
" #return self._data[x, y, z]\n",
" \n",
" def __setitem__(self, key, value):\n",
" #print(key, value)\n",
" self._store[key] = value # raise RunTimeError\n",
" \n",
" def __delitem__(self, key):\n",
" raise NotImplementedError\n",
" \n",
" def __contains__(self, key):\n",
" return key in range(self._data.shape[0])"
]
},
{
"cell_type": "code",
"execution_count": 165,
"id": "eac24f9d",
"metadata": {},
"outputs": [],
"source": [
"arr = zarr.create((nframes, natoms, 3),\n",
" store=HugoStore('./data.hugo', natoms=100),\n",
" chunks=(1, natoms, 3),\n",
" compressor=None)"
]
},
{
"cell_type": "code",
"execution_count": 167,
"id": "8e23a454",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.0.0\n",
"2.0.0\n"
]
},
{
"data": {
"text/plain": [
"array([[300., 303., 306., 309., 312., 315., 318., 321., 324., 327., 330.,\n",
" 333., 336., 339., 342., 345., 348., 351., 354., 357., 360., 363.,\n",
" 366., 369., 372., 375., 378., 381., 384., 387., 390., 393., 396.,\n",
" 399., 402., 405., 408., 411., 414., 417., 420., 423., 426., 429.,\n",
" 432., 435., 438., 441., 444., 447., 450., 453., 456., 459., 462.,\n",
" 465., 468., 471., 474., 477., 480., 483., 486., 489., 492., 495.,\n",
" 498., 501., 504., 507., 510., 513., 516., 519., 522., 525., 528.,\n",
" 531., 534., 537., 540., 543., 546., 549., 552., 555., 558., 561.,\n",
" 564., 567., 570., 573., 576., 579., 582., 585., 588., 591., 594.,\n",
" 597.],\n",
" [600., 603., 606., 609., 612., 615., 618., 621., 624., 627., 630.,\n",
" 633., 636., 639., 642., 645., 648., 651., 654., 657., 660., 663.,\n",
" 666., 669., 672., 675., 678., 681., 684., 687., 690., 693., 696.,\n",
" 699., 702., 705., 708., 711., 714., 717., 720., 723., 726., 729.,\n",
" 732., 735., 738., 741., 744., 747., 750., 753., 756., 759., 762.,\n",
" 765., 768., 771., 774., 777., 780., 783., 786., 789., 792., 795.,\n",
" 798., 801., 804., 807., 810., 813., 816., 819., 822., 825., 828.,\n",
" 831., 834., 837., 840., 843., 846., 849., 852., 855., 858., 861.,\n",
" 864., 867., 870., 873., 876., 879., 882., 885., 888., 891., 894.,\n",
" 897.]])"
]
},
"execution_count": 167,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"arr[1:3, :, 0]"
]
},
{
"cell_type": "code",
"execution_count": 162,
"id": "db73d3d2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1, 100)"
]
},
"execution_count": 162,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[1:2, :, 0].shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "962802c9",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment