Skip to content

Instantly share code, notes, and snippets.

@fjetter
Last active October 26, 2023 15:30
Show Gist options
  • Save fjetter/8e7a06b664838b37433f889c6ce63ca5 to your computer and use it in GitHub Desktop.
Save fjetter/8e7a06b664838b37433f889c6ce63ca5 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "e21fe63f-f6a4-47fc-9497-dbb3aa7f4ddc",
"metadata": {},
"outputs": [],
"source": [
"from distributed.protocol.serialize import Serialize, serialize"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "8b30db4e-55ae-4700-a73f-ce77be6eee62",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"buffer = [\n",
" np.random.random((4, ))\n",
"]\n",
"# If the array is in a simple list that is being serialize, we're just passing the memory view\n",
"assert type(serialize(buffer[:1])[1][0]) is memoryview\n",
"serialize(buffer[:1])[1][0] == buffer[0].data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "7a870a80-1dd1-4a29-8c07-5f2e588cd3fd",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"buffer = [\n",
" Serialize(np.random.random((4, )))\n",
"]\n",
"# If we're using a Serialized object, it's also there (but there is additional payload due to the Serialized object on pos0 of the serialize output\n",
"serialize(buffer[:1])[1][1] == buffer[0].data"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "dac7a9ce-fde1-4485-9538-b3a07a3a51c6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"buffer = [\n",
" (1, np.random.random((4, )))\n",
"]\n",
"# Now, if we have nested objects, i.e. a tuple in a list we're recusing into the collections\n",
"# However, we're only looking at the first item and check if there is a special serializer registered\n",
"# for that item. In this case it is just an integer and we're throwing the tuple into a plain pickle.dumps, \n",
"# i.e. it copies stuff\n",
"assert type(serialize(buffer[:1])[1][0]) is not memoryview\n",
"assert type(serialize(buffer[:1])[1][0]) is bytes\n",
"serialize(buffer)[1][0] == buffer[0][1].data"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "6cb4c7dd-dc9b-44c0-b637-3121bfdcc899",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"buffer = [\n",
" (np.random.random((4, )), 1)\n",
"]\n",
"# IFF, however, the first item is dask-serializable, it will attempt to do so and we have a gain a memoryview\n",
"assert type(serialize(buffer[:1])[1][0]) is memoryview\n",
"serialize(buffer)[1][0] == buffer[0][0].data"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "2213f606-7184-40b2-8842-944c30315c7c",
"metadata": {},
"outputs": [],
"source": [
"class _List(list):\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "289e7a6b-809e-409d-ae9d-8f1b4e0eb65e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"# If we now wrap the entire thing into a custom list object or any iterable, it will still detect it\n",
"# If it is on the first level\n",
"buffer = _List([\n",
" np.random.random((4, ))\n",
"])\n",
"serialize(buffer)[1][1] == buffer[0].data"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "306709be-7747-4a82-8ad7-a6d7d41a9881",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"# However, we no longer recurse into the nested collections and this just pickles stuff\n",
"buffer = _List([\n",
" (np.random.random((4, )), 1)\n",
"])\n",
"serialize(buffer)[1][0] == buffer[0][0].data"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment