Last active
October 26, 2023 15:30
-
-
Save fjetter/8e7a06b664838b37433f889c6ce63ca5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "e21fe63f-f6a4-47fc-9497-dbb3aa7f4ddc", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from distributed.protocol.serialize import Serialize, serialize" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "8b30db4e-55ae-4700-a73f-ce77be6eee62", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import numpy as np\n", | |
"buffer = [\n", | |
" np.random.random((4, ))\n", | |
"]\n", | |
"# If the array is in a simple list that is being serialize, we're just passing the memory view\n", | |
"assert type(serialize(buffer[:1])[1][0]) is memoryview\n", | |
"serialize(buffer[:1])[1][0] == buffer[0].data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "7a870a80-1dd1-4a29-8c07-5f2e588cd3fd", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import numpy as np\n", | |
"buffer = [\n", | |
" Serialize(np.random.random((4, )))\n", | |
"]\n", | |
"# If we're using a Serialized object, it's also there (but there is additional payload due to the Serialized object on pos0 of the serialize output\n", | |
"serialize(buffer[:1])[1][1] == buffer[0].data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "dac7a9ce-fde1-4485-9538-b3a07a3a51c6", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"False" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import numpy as np\n", | |
"buffer = [\n", | |
" (1, np.random.random((4, )))\n", | |
"]\n", | |
"# Now, if we have nested objects, i.e. a tuple in a list we're recusing into the collections\n", | |
"# However, we're only looking at the first item and check if there is a special serializer registered\n", | |
"# for that item. In this case it is just an integer and we're throwing the tuple into a plain pickle.dumps, \n", | |
"# i.e. it copies stuff\n", | |
"assert type(serialize(buffer[:1])[1][0]) is not memoryview\n", | |
"assert type(serialize(buffer[:1])[1][0]) is bytes\n", | |
"serialize(buffer)[1][0] == buffer[0][1].data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "6cb4c7dd-dc9b-44c0-b637-3121bfdcc899", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import numpy as np\n", | |
"buffer = [\n", | |
" (np.random.random((4, )), 1)\n", | |
"]\n", | |
"# IFF, however, the first item is dask-serializable, it will attempt to do so and we have a gain a memoryview\n", | |
"assert type(serialize(buffer[:1])[1][0]) is memoryview\n", | |
"serialize(buffer)[1][0] == buffer[0][0].data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "2213f606-7184-40b2-8842-944c30315c7c", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"class _List(list):\n", | |
" pass" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "289e7a6b-809e-409d-ae9d-8f1b4e0eb65e", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import numpy as np\n", | |
"# If we now wrap the entire thing into a custom list object or any iterable, it will still detect it\n", | |
"# If it is on the first level\n", | |
"buffer = _List([\n", | |
" np.random.random((4, ))\n", | |
"])\n", | |
"serialize(buffer)[1][1] == buffer[0].data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "306709be-7747-4a82-8ad7-a6d7d41a9881", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"False" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import numpy as np\n", | |
"# However, we no longer recurse into the nested collections and this just pickles stuff\n", | |
"buffer = _List([\n", | |
" (np.random.random((4, )), 1)\n", | |
"])\n", | |
"serialize(buffer)[1][0] == buffer[0][0].data" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment