Created
July 16, 2022 18:15
-
-
Save richardjgowers/3afc8516896594be9d98e01bba761e32 to your computer and use it in GitHub Desktop.
zarr prototype
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"id": "acdf6cf8", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import zarr\n", | |
"from zarr.storage import BaseStore\n", | |
"\n", | |
"from zarr.storage import init_array, init_group" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 163, | |
"id": "2720da69", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# make my data\n", | |
"\n", | |
"nframes = 5\n", | |
"natoms = 100\n", | |
"\n", | |
"data = np.arange(nframes * natoms * 3).reshape(nframes, natoms, -1).astype(np.float64)\n", | |
"\n", | |
"with open('data.hugo', 'wb') as f:\n", | |
" f.write(data.tobytes())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 164, | |
"id": "fb1b1652", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# read my data\n", | |
"\n", | |
"def create_meta_store(arr):\n", | |
" store = dict()\n", | |
" \n", | |
" init_group(store)\n", | |
" #init_attrs(store, dict())\n", | |
" \n", | |
" init_array(store,\n", | |
" path='cheeky',\n", | |
" shape=arr.shape,\n", | |
" chunks=10,\n", | |
" #chunks=(1, 100, 3),\n", | |
" dtype=np.float64,\n", | |
" compressor=None)\n", | |
" \n", | |
" return store\n", | |
"\n", | |
"\n", | |
"class HugoStore(BaseStore):\n", | |
" def __init__(self, path, natoms):\n", | |
" self._path = path\n", | |
" self._data = np.fromfile(path, dtype=np.float64).reshape(-1, natoms, 3)\n", | |
" self._store = create_meta_store(self._data)\n", | |
" \n", | |
" def __len__(self):\n", | |
" return self._data.shape[0]\n", | |
" \n", | |
" def __iter__(self):\n", | |
" yield from self.keys()\n", | |
"\n", | |
" def __getitem__(self, key):\n", | |
" if key in self._store:\n", | |
" return self._store[key]\n", | |
" else:\n", | |
" print(key)\n", | |
" x, y, z = map(int, key.split('.'))\n", | |
"\n", | |
" return self._data[x]\n", | |
" #return self._data[x, y, z]\n", | |
" \n", | |
" def __setitem__(self, key, value):\n", | |
" #print(key, value)\n", | |
" self._store[key] = value # raise RunTimeError\n", | |
" \n", | |
" def __delitem__(self, key):\n", | |
" raise NotImplementedError\n", | |
" \n", | |
" def __contains__(self, key):\n", | |
" return key in range(self._data.shape[0])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 165, | |
"id": "eac24f9d", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"arr = zarr.create((nframes, natoms, 3),\n", | |
" store=HugoStore('./data.hugo', natoms=100),\n", | |
" chunks=(1, natoms, 3),\n", | |
" compressor=None)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 167, | |
"id": "8e23a454", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1.0.0\n", | |
"2.0.0\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[300., 303., 306., 309., 312., 315., 318., 321., 324., 327., 330.,\n", | |
" 333., 336., 339., 342., 345., 348., 351., 354., 357., 360., 363.,\n", | |
" 366., 369., 372., 375., 378., 381., 384., 387., 390., 393., 396.,\n", | |
" 399., 402., 405., 408., 411., 414., 417., 420., 423., 426., 429.,\n", | |
" 432., 435., 438., 441., 444., 447., 450., 453., 456., 459., 462.,\n", | |
" 465., 468., 471., 474., 477., 480., 483., 486., 489., 492., 495.,\n", | |
" 498., 501., 504., 507., 510., 513., 516., 519., 522., 525., 528.,\n", | |
" 531., 534., 537., 540., 543., 546., 549., 552., 555., 558., 561.,\n", | |
" 564., 567., 570., 573., 576., 579., 582., 585., 588., 591., 594.,\n", | |
" 597.],\n", | |
" [600., 603., 606., 609., 612., 615., 618., 621., 624., 627., 630.,\n", | |
" 633., 636., 639., 642., 645., 648., 651., 654., 657., 660., 663.,\n", | |
" 666., 669., 672., 675., 678., 681., 684., 687., 690., 693., 696.,\n", | |
" 699., 702., 705., 708., 711., 714., 717., 720., 723., 726., 729.,\n", | |
" 732., 735., 738., 741., 744., 747., 750., 753., 756., 759., 762.,\n", | |
" 765., 768., 771., 774., 777., 780., 783., 786., 789., 792., 795.,\n", | |
" 798., 801., 804., 807., 810., 813., 816., 819., 822., 825., 828.,\n", | |
" 831., 834., 837., 840., 843., 846., 849., 852., 855., 858., 861.,\n", | |
" 864., 867., 870., 873., 876., 879., 882., 885., 888., 891., 894.,\n", | |
" 897.]])" | |
] | |
}, | |
"execution_count": 167, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"arr[1:3, :, 0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 162, | |
"id": "db73d3d2", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(1, 100)" | |
] | |
}, | |
"execution_count": 162, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data[1:2, :, 0].shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "962802c9", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment