Created
November 12, 2022 08:09
-
-
Save daniel-falk/2fae689f61345ab109c9810c67ed7ecd to your computer and use it in GitHub Desktop.
deeplake_bug.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"authorship_tag": "ABX9TyOEgStnfvYmMzgs2r86pnpi", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/daniel-falk/2fae689f61345ab109c9810c67ed7ecd/deeplake_bug.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"TOKEN=\"REPLACE_ME\"" | |
], | |
"metadata": { | |
"id": "BOfKhcX1xlVL" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "MzyKTo-vxbbc" | |
}, | |
"outputs": [], | |
"source": [ | |
"!pip install deeplake" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import deeplake\n", | |
"import sys\n", | |
"\n", | |
"print(\"Deeplake: \", deeplake.__version__)\n", | |
"print(\"Python: \", sys.version)" | |
], | |
"metadata": { | |
"id": "RWLOYpkAyZwV" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# Fast\n", | |
"Approx 70-130 images per second in notebook.\n", | |
"Approx 110 images per socond on my desktop." | |
], | |
"metadata": { | |
"id": "LckqjGyWxxav" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"%%time\n", | |
"\n", | |
"ds = deeplake.load('hub://activeloop/imagenet-train', token=TOKEN)\n", | |
"\n", | |
"@deeplake.compute\n", | |
"def calculate_mean(sample_in, sample_out):\n", | |
" sample_in.images.numpy()\n", | |
" return None\n", | |
"\n", | |
"tmp_ds = deeplake.empty(\"mem://tmp_ds\")\n", | |
"tmp_ds.create_tensor(\"dummy\", htype=\"generic\")\n", | |
"calculate_mean().eval(ds, tmp_ds, num_workers=8)" | |
], | |
"metadata": { | |
"id": "rombtM_Hx2HM" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# Slow\n", | |
"The only difference is that the output dataset does not have any tensors specified.\n", | |
"\n", | |
"Approx 30 images per second in notebook. Approx 1.5 images per second on my desktop." | |
], | |
"metadata": { | |
"id": "s0ScH2pkxehZ" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"%%time\n", | |
"ds = deeplake.load('hub://activeloop/imagenet-train', token=TOKEN)\n", | |
"\n", | |
"@deeplake.compute\n", | |
"def calculate_mean(sample_in, sample_out):\n", | |
" sample_in.images.numpy()\n", | |
" return None\n", | |
"\n", | |
"tmp_ds = deeplake.empty(\"mem://tmp_ds\")\n", | |
"#tmp_ds.create_tensor(\"dummy\", htype=\"generic\")\n", | |
"calculate_mean().eval(ds, tmp_ds, num_workers=8)" | |
], | |
"metadata": { | |
"id": "cwqmMDHXxiJI" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment