Skip to content

Instantly share code, notes, and snippets.

@daniel-falk
Created September 18, 2022 14:41
Show Gist options
  • Save daniel-falk/c58eae122acf730607aeeddaf1848229 to your computer and use it in GitHub Desktop.
Save daniel-falk/c58eae122acf730607aeeddaf1848229 to your computer and use it in GitHub Desktop.
load_from_hub.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"name": "load_from_hub.ipynb",
"collapsed_sections": [],
"authorship_tag": "ABX9TyMNr4XAl6N2XaH84h2K9M3F",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/daniel-falk/c58eae122acf730607aeeddaf1848229/untitled2.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "VNZJDhY--SwK",
"outputId": "4b57cc61-c9a6-4ffa-b043-005014718c7a"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: hub in /usr/local/lib/python3.7/dist-packages (2.8.4)\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from hub) (4.64.1)\n",
"Requirement already satisfied: pyjwt in /usr/local/lib/python3.7/dist-packages (from hub) (2.5.0)\n",
"Requirement already satisfied: pillow in /usr/local/lib/python3.7/dist-packages (from hub) (7.1.2)\n",
"Requirement already satisfied: pathos in /usr/local/lib/python3.7/dist-packages (from hub) (0.2.9)\n",
"Requirement already satisfied: numcodecs in /usr/local/lib/python3.7/dist-packages (from hub) (0.10.2)\n",
"Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from hub) (7.1.2)\n",
"Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from hub) (1.21.6)\n",
"Requirement already satisfied: boto3 in /usr/local/lib/python3.7/dist-packages (from hub) (1.24.75)\n",
"Requirement already satisfied: humbug>=0.2.6 in /usr/local/lib/python3.7/dist-packages (from hub) (0.2.7)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from humbug>=0.2.6->hub) (2.23.0)\n",
"Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /usr/local/lib/python3.7/dist-packages (from boto3->hub) (1.0.1)\n",
"Requirement already satisfied: botocore<1.28.0,>=1.27.75 in /usr/local/lib/python3.7/dist-packages (from boto3->hub) (1.27.75)\n",
"Requirement already satisfied: s3transfer<0.7.0,>=0.6.0 in /usr/local/lib/python3.7/dist-packages (from boto3->hub) (0.6.0)\n",
"Requirement already satisfied: urllib3<1.27,>=1.25.4 in /usr/local/lib/python3.7/dist-packages (from botocore<1.28.0,>=1.27.75->boto3->hub) (1.25.11)\n",
"Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /usr/local/lib/python3.7/dist-packages (from botocore<1.28.0,>=1.27.75->boto3->hub) (2.8.2)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.28.0,>=1.27.75->boto3->hub) (1.15.0)\n",
"Requirement already satisfied: entrypoints in /usr/local/lib/python3.7/dist-packages (from numcodecs->hub) (0.4)\n",
"Requirement already satisfied: typing-extensions>=3.7.4 in /usr/local/lib/python3.7/dist-packages (from numcodecs->hub) (4.1.1)\n",
"Requirement already satisfied: dill>=0.3.5.1 in /usr/local/lib/python3.7/dist-packages (from pathos->hub) (0.3.5.1)\n",
"Requirement already satisfied: ppft>=1.7.6.5 in /usr/local/lib/python3.7/dist-packages (from pathos->hub) (1.7.6.5)\n",
"Requirement already satisfied: multiprocess>=0.70.13 in /usr/local/lib/python3.7/dist-packages (from pathos->hub) (0.70.13)\n",
"Requirement already satisfied: pox>=0.3.1 in /usr/local/lib/python3.7/dist-packages (from pathos->hub) (0.3.1)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->humbug>=0.2.6->hub) (2.10)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->humbug>=0.2.6->hub) (2022.6.15)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->humbug>=0.2.6->hub) (3.0.4)\n"
]
}
],
"source": [
"!pip install hub\n",
"import hub"
]
},
{
"cell_type": "code",
"source": [
"hub_ds = hub.load(\"hub://activeloop/mnist-test\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "HXuqTZnD-eJa",
"outputId": "f92d6e9e-ae28-42dd-95cf-e3d8ecf6949c"
},
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"hub://activeloop/mnist-test loaded successfully.\n",
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/activeloop/mnist-test\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import time\n",
"\n",
"t0 = time.time()\n",
"for i, sample in enumerate(hub_ds):\n",
" sample[\"images\"] # Access the image to omake sure its loaded\n",
"\n",
" if i % 10 == 0:\n",
" hub_time = (time.time() - t0) / (i + 1)\n",
" print(f\"{i}: {hub_time} seconds per sample\")\n",
"\n",
" if i > 1000:\n",
" break"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "KnxpswXC-wk_",
"outputId": "0aa6ed59-3912-4157-a769-6d1f29cc4bdf"
},
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"0: 0.0023403167724609375 seconds per sample\n",
"10: 0.00044900720769708806 seconds per sample\n",
"20: 0.00029596828279041107 seconds per sample\n",
"30: 0.00021849140044181577 seconds per sample\n",
"40: 0.00020253949049042492 seconds per sample\n",
"50: 0.00017305916430903416 seconds per sample\n",
"60: 0.0001536041009621542 seconds per sample\n",
"70: 0.00013904504373039997 seconds per sample\n",
"80: 0.00012873425895785107 seconds per sample\n",
"90: 0.00012005030453860105 seconds per sample\n",
"100: 0.00011329603667306428 seconds per sample\n",
"110: 0.00013678782695048563 seconds per sample\n",
"120: 0.00013012137294800814 seconds per sample\n",
"130: 0.00013159431573998837 seconds per sample\n",
"140: 0.00012965405240971992 seconds per sample\n",
"150: 0.0001244876558417516 seconds per sample\n",
"160: 0.00012098187985627547 seconds per sample\n",
"170: 0.00012138851901941132 seconds per sample\n",
"180: 0.00011879567941908019 seconds per sample\n",
"190: 0.0001190208015641617 seconds per sample\n",
"200: 0.0001408069287959616 seconds per sample\n",
"210: 0.0001390658283685621 seconds per sample\n",
"220: 0.00013739490940560043 seconds per sample\n",
"230: 0.00014363016401018416 seconds per sample\n",
"240: 0.00014326087666745007 seconds per sample\n",
"250: 0.0001424945208180948 seconds per sample\n",
"260: 0.000139262941148546 seconds per sample\n",
"270: 0.0001396596211788839 seconds per sample\n",
"280: 0.00013682681046346752 seconds per sample\n",
"290: 0.00013390193690139402 seconds per sample\n",
"300: 0.00013123715042671888 seconds per sample\n",
"310: 0.00012868163669990958 seconds per sample\n",
"320: 0.00012634030755063826 seconds per sample\n",
"330: 0.00012405401270195075 seconds per sample\n",
"340: 0.00012748472152217743 seconds per sample\n",
"350: 0.00012548085291501124 seconds per sample\n",
"360: 0.00012361111733391676 seconds per sample\n",
"370: 0.00012927942198884455 seconds per sample\n",
"380: 0.00012735369324371257 seconds per sample\n",
"390: 0.00012541731910022629 seconds per sample\n",
"400: 0.00012360487198294546 seconds per sample\n",
"410: 0.00012185277730008981 seconds per sample\n",
"420: 0.00012021393220951325 seconds per sample\n",
"430: 0.00011860798793713067 seconds per sample\n",
"440: 0.00012439665069926084 seconds per sample\n",
"450: 0.00012289869811741053 seconds per sample\n",
"460: 0.00012134936781612239 seconds per sample\n",
"470: 0.00011989721067392143 seconds per sample\n",
"480: 0.00011847817228638457 seconds per sample\n",
"490: 0.0001171261618433562 seconds per sample\n",
"500: 0.00011579386012520857 seconds per sample\n",
"510: 0.00012082269746963291 seconds per sample\n",
"520: 0.00011960695892744009 seconds per sample\n",
"530: 0.00011835439505999146 seconds per sample\n",
"540: 0.00011712918660556983 seconds per sample\n",
"550: 0.00011592162282843339 seconds per sample\n",
"560: 0.0001148523068895527 seconds per sample\n",
"570: 0.00011373109032519018 seconds per sample\n",
"580: 0.00011266693601099431 seconds per sample\n",
"590: 0.00011160894093779742 seconds per sample\n",
"600: 0.00011065121300010237 seconds per sample\n",
"610: 0.00011720009599301702 seconds per sample\n",
"620: 0.00011620352617592435 seconds per sample\n",
"630: 0.00011524798941876734 seconds per sample\n",
"640: 0.00011436206502215167 seconds per sample\n",
"650: 0.00011355353207449026 seconds per sample\n",
"660: 0.00011263564567161941 seconds per sample\n",
"670: 0.00011175826480598279 seconds per sample\n",
"680: 0.00011094411214192708 seconds per sample\n",
"690: 0.00011008175685678308 seconds per sample\n",
"700: 0.0001092780163556805 seconds per sample\n",
"710: 0.00010846335173659183 seconds per sample\n",
"720: 0.000107738082187351 seconds per sample\n",
"730: 0.00010696807736084986 seconds per sample\n",
"740: 0.00010625006538010158 seconds per sample\n",
"750: 0.00010554006350500764 seconds per sample\n",
"760: 0.00010484715485541485 seconds per sample\n",
"770: 0.00010419819605489649 seconds per sample\n",
"780: 0.00010352098071773593 seconds per sample\n",
"790: 0.00010287234213499896 seconds per sample\n",
"800: 0.00010226430666729454 seconds per sample\n",
"810: 0.00010169713623573688 seconds per sample\n",
"820: 0.00010112316396436796 seconds per sample\n",
"830: 0.00010057075215877895 seconds per sample\n",
"840: 0.00010041844688896333 seconds per sample\n",
"850: 9.998283431056523e-05 seconds per sample\n",
"860: 9.946956036398219e-05 seconds per sample\n",
"870: 9.895219594821864e-05 seconds per sample\n",
"880: 9.846200196074574e-05 seconds per sample\n",
"890: 9.796675608214305e-05 seconds per sample\n",
"900: 9.752722347483916e-05 seconds per sample\n",
"910: 9.70596539594732e-05 seconds per sample\n",
"920: 9.652431831815473e-05 seconds per sample\n",
"930: 9.605579806449718e-05 seconds per sample\n",
"940: 9.556227105330204e-05 seconds per sample\n",
"950: 9.511246666171198e-05 seconds per sample\n",
"960: 9.462587791228518e-05 seconds per sample\n",
"970: 9.419301756869629e-05 seconds per sample\n",
"980: 9.373835953489842e-05 seconds per sample\n",
"990: 9.333930992334329e-05 seconds per sample\n",
"1000: 9.466408492325545e-05 seconds per sample\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import time\n",
"\n",
"t0 = time.time()\n",
"for i, sample in enumerate(hub_ds.tensorflow()):\n",
" sample[\"images\"] # Access the image to omake sure its loaded\n",
"\n",
" if i % 10 == 0:\n",
" tf_time = (time.time() - t0) / (i + 1)\n",
" print(f\"{i}: {tf_time} seconds per sample\")\n",
"\n",
" if i > 1000:\n",
" break"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "brfXuAst_LEx",
"outputId": "30cf21f2-c32d-42e5-acf9-8e09667ecf3a"
},
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"0: 2.5499863624572754 seconds per sample\n",
"10: 0.2446353869004683 seconds per sample\n",
"20: 0.1349175430479504 seconds per sample\n",
"30: 0.09664191738251716 seconds per sample\n",
"40: 0.07681980947168862 seconds per sample\n",
"50: 0.06613618252324123 seconds per sample\n",
"60: 0.057545884710843445 seconds per sample\n",
"70: 0.05134633225454411 seconds per sample\n",
"80: 0.04689141850412628 seconds per sample\n",
"90: 0.04354245322091239 seconds per sample\n",
"100: 0.04098711391486744 seconds per sample\n",
"110: 0.038674784136248065 seconds per sample\n",
"120: 0.036734787885807765 seconds per sample\n",
"130: 0.035161020191571184 seconds per sample\n",
"140: 0.03381282894323904 seconds per sample\n",
"150: 0.032525776237841475 seconds per sample\n",
"160: 0.03146453525709069 seconds per sample\n",
"170: 0.030439116104304442 seconds per sample\n",
"180: 0.029594832362391015 seconds per sample\n",
"190: 0.02876018728885351 seconds per sample\n",
"200: 0.028106065531868247 seconds per sample\n",
"210: 0.02754987020628147 seconds per sample\n",
"220: 0.026983977442952844 seconds per sample\n",
"230: 0.02648484861695921 seconds per sample\n",
"240: 0.02606088689748677 seconds per sample\n",
"250: 0.025639268981508048 seconds per sample\n",
"260: 0.026758706432649458 seconds per sample\n",
"270: 0.028794135554690202 seconds per sample\n",
"280: 0.028282693272383613 seconds per sample\n",
"290: 0.027772724833275444 seconds per sample\n",
"300: 0.027268230717047506 seconds per sample\n",
"310: 0.026819964313813727 seconds per sample\n",
"320: 0.026403941098031968 seconds per sample\n",
"330: 0.025994226651609484 seconds per sample\n",
"340: 0.025647930385779776 seconds per sample\n",
"350: 0.025299713482544294 seconds per sample\n",
"360: 0.024995083954195568 seconds per sample\n",
"370: 0.024684292929513112 seconds per sample\n",
"380: 0.024388362103559838 seconds per sample\n",
"390: 0.024118682612543522 seconds per sample\n",
"400: 0.02383773760902614 seconds per sample\n",
"410: 0.023587406116680507 seconds per sample\n",
"420: 0.023334799922843444 seconds per sample\n",
"430: 0.02311845113395815 seconds per sample\n",
"440: 0.022908488639087635 seconds per sample\n",
"450: 0.022694417484055072 seconds per sample\n",
"460: 0.02249204103962201 seconds per sample\n",
"470: 0.0222954299546098 seconds per sample\n",
"480: 0.02211146196060022 seconds per sample\n",
"490: 0.0219388906436151 seconds per sample\n",
"500: 0.02177416635844522 seconds per sample\n",
"510: 0.021608307403594314 seconds per sample\n",
"520: 0.02144949861771772 seconds per sample\n",
"530: 0.021318212067340053 seconds per sample\n",
"540: 0.021276324161099418 seconds per sample\n",
"550: 0.021199111713471733 seconds per sample\n",
"560: 0.021162507793270117 seconds per sample\n",
"570: 0.021061609588863554 seconds per sample\n",
"580: 0.020974042698766605 seconds per sample\n",
"590: 0.020901572684144408 seconds per sample\n",
"600: 0.020755505601498928 seconds per sample\n",
"610: 0.02061978221524952 seconds per sample\n",
"620: 0.020484183721496286 seconds per sample\n",
"630: 0.020357846458060238 seconds per sample\n",
"640: 0.020241918876278978 seconds per sample\n",
"650: 0.020127672204224196 seconds per sample\n",
"660: 0.02002317028940175 seconds per sample\n",
"670: 0.01992295573614038 seconds per sample\n",
"680: 0.019820148199140238 seconds per sample\n",
"690: 0.019718368215602317 seconds per sample\n",
"700: 0.019622194613947846 seconds per sample\n",
"710: 0.01953984882928819 seconds per sample\n",
"720: 0.019442752727027078 seconds per sample\n",
"730: 0.019360974777576534 seconds per sample\n",
"740: 0.01926791716201103 seconds per sample\n",
"750: 0.01918858472262813 seconds per sample\n",
"760: 0.019111050882727965 seconds per sample\n",
"770: 0.020481179815012813 seconds per sample\n",
"780: 0.020399160482819347 seconds per sample\n",
"790: 0.020299239646921267 seconds per sample\n",
"800: 0.020197581709100958 seconds per sample\n",
"810: 0.020100509782313713 seconds per sample\n",
"820: 0.0200064675961865 seconds per sample\n",
"830: 0.01992182238150733 seconds per sample\n",
"840: 0.01983856843002628 seconds per sample\n",
"850: 0.01976132953208986 seconds per sample\n",
"860: 0.019684644249396595 seconds per sample\n",
"870: 0.019605942352767927 seconds per sample\n",
"880: 0.0195262732489561 seconds per sample\n",
"890: 0.019454815037188974 seconds per sample\n",
"900: 0.019378571346253322 seconds per sample\n",
"910: 0.01932675281026361 seconds per sample\n",
"920: 0.019277630100809403 seconds per sample\n",
"930: 0.019214135875763365 seconds per sample\n",
"940: 0.019151381554436353 seconds per sample\n",
"950: 0.01909038872623544 seconds per sample\n",
"960: 0.01911944157127039 seconds per sample\n",
"970: 0.019062647981574924 seconds per sample\n",
"980: 0.01899950997666603 seconds per sample\n",
"990: 0.018944131858894972 seconds per sample\n",
"1000: 0.01888610647393988 seconds per sample\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(f\"TF dataset is {tf_time / hub_time} times slower\")"
],
"metadata": {
"id": "K_Kg7llw_W1U",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "6e4f6ae6-dc9a-4dbc-be2d-0bbf1cfa5ac9"
},
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"TF dataset is 199.50656565529493 times slower\n"
]
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment