Created
September 18, 2022 14:41
-
-
Save daniel-falk/c58eae122acf730607aeeddaf1848229 to your computer and use it in GitHub Desktop.
load_from_hub.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"name": "load_from_hub.ipynb", | |
"collapsed_sections": [], | |
"authorship_tag": "ABX9TyMNr4XAl6N2XaH84h2K9M3F", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/daniel-falk/c58eae122acf730607aeeddaf1848229/untitled2.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "VNZJDhY--SwK", | |
"outputId": "4b57cc61-c9a6-4ffa-b043-005014718c7a" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", | |
"Requirement already satisfied: hub in /usr/local/lib/python3.7/dist-packages (2.8.4)\n", | |
"Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from hub) (4.64.1)\n", | |
"Requirement already satisfied: pyjwt in /usr/local/lib/python3.7/dist-packages (from hub) (2.5.0)\n", | |
"Requirement already satisfied: pillow in /usr/local/lib/python3.7/dist-packages (from hub) (7.1.2)\n", | |
"Requirement already satisfied: pathos in /usr/local/lib/python3.7/dist-packages (from hub) (0.2.9)\n", | |
"Requirement already satisfied: numcodecs in /usr/local/lib/python3.7/dist-packages (from hub) (0.10.2)\n", | |
"Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from hub) (7.1.2)\n", | |
"Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from hub) (1.21.6)\n", | |
"Requirement already satisfied: boto3 in /usr/local/lib/python3.7/dist-packages (from hub) (1.24.75)\n", | |
"Requirement already satisfied: humbug>=0.2.6 in /usr/local/lib/python3.7/dist-packages (from hub) (0.2.7)\n", | |
"Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from humbug>=0.2.6->hub) (2.23.0)\n", | |
"Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /usr/local/lib/python3.7/dist-packages (from boto3->hub) (1.0.1)\n", | |
"Requirement already satisfied: botocore<1.28.0,>=1.27.75 in /usr/local/lib/python3.7/dist-packages (from boto3->hub) (1.27.75)\n", | |
"Requirement already satisfied: s3transfer<0.7.0,>=0.6.0 in /usr/local/lib/python3.7/dist-packages (from boto3->hub) (0.6.0)\n", | |
"Requirement already satisfied: urllib3<1.27,>=1.25.4 in /usr/local/lib/python3.7/dist-packages (from botocore<1.28.0,>=1.27.75->boto3->hub) (1.25.11)\n", | |
"Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /usr/local/lib/python3.7/dist-packages (from botocore<1.28.0,>=1.27.75->boto3->hub) (2.8.2)\n", | |
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.28.0,>=1.27.75->boto3->hub) (1.15.0)\n", | |
"Requirement already satisfied: entrypoints in /usr/local/lib/python3.7/dist-packages (from numcodecs->hub) (0.4)\n", | |
"Requirement already satisfied: typing-extensions>=3.7.4 in /usr/local/lib/python3.7/dist-packages (from numcodecs->hub) (4.1.1)\n", | |
"Requirement already satisfied: dill>=0.3.5.1 in /usr/local/lib/python3.7/dist-packages (from pathos->hub) (0.3.5.1)\n", | |
"Requirement already satisfied: ppft>=1.7.6.5 in /usr/local/lib/python3.7/dist-packages (from pathos->hub) (1.7.6.5)\n", | |
"Requirement already satisfied: multiprocess>=0.70.13 in /usr/local/lib/python3.7/dist-packages (from pathos->hub) (0.70.13)\n", | |
"Requirement already satisfied: pox>=0.3.1 in /usr/local/lib/python3.7/dist-packages (from pathos->hub) (0.3.1)\n", | |
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->humbug>=0.2.6->hub) (2.10)\n", | |
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->humbug>=0.2.6->hub) (2022.6.15)\n", | |
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->humbug>=0.2.6->hub) (3.0.4)\n" | |
] | |
} | |
], | |
"source": [ | |
"!pip install hub\n", | |
"import hub" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"hub_ds = hub.load(\"hub://activeloop/mnist-test\")" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "HXuqTZnD-eJa", | |
"outputId": "f92d6e9e-ae28-42dd-95cf-e3d8ecf6949c" | |
}, | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"hub://activeloop/mnist-test loaded successfully.\n", | |
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/activeloop/mnist-test\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import time\n", | |
"\n", | |
"t0 = time.time()\n", | |
"for i, sample in enumerate(hub_ds):\n", | |
" sample[\"images\"] # Access the image to omake sure its loaded\n", | |
"\n", | |
" if i % 10 == 0:\n", | |
" hub_time = (time.time() - t0) / (i + 1)\n", | |
" print(f\"{i}: {hub_time} seconds per sample\")\n", | |
"\n", | |
" if i > 1000:\n", | |
" break" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "KnxpswXC-wk_", | |
"outputId": "0aa6ed59-3912-4157-a769-6d1f29cc4bdf" | |
}, | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"0: 0.0023403167724609375 seconds per sample\n", | |
"10: 0.00044900720769708806 seconds per sample\n", | |
"20: 0.00029596828279041107 seconds per sample\n", | |
"30: 0.00021849140044181577 seconds per sample\n", | |
"40: 0.00020253949049042492 seconds per sample\n", | |
"50: 0.00017305916430903416 seconds per sample\n", | |
"60: 0.0001536041009621542 seconds per sample\n", | |
"70: 0.00013904504373039997 seconds per sample\n", | |
"80: 0.00012873425895785107 seconds per sample\n", | |
"90: 0.00012005030453860105 seconds per sample\n", | |
"100: 0.00011329603667306428 seconds per sample\n", | |
"110: 0.00013678782695048563 seconds per sample\n", | |
"120: 0.00013012137294800814 seconds per sample\n", | |
"130: 0.00013159431573998837 seconds per sample\n", | |
"140: 0.00012965405240971992 seconds per sample\n", | |
"150: 0.0001244876558417516 seconds per sample\n", | |
"160: 0.00012098187985627547 seconds per sample\n", | |
"170: 0.00012138851901941132 seconds per sample\n", | |
"180: 0.00011879567941908019 seconds per sample\n", | |
"190: 0.0001190208015641617 seconds per sample\n", | |
"200: 0.0001408069287959616 seconds per sample\n", | |
"210: 0.0001390658283685621 seconds per sample\n", | |
"220: 0.00013739490940560043 seconds per sample\n", | |
"230: 0.00014363016401018416 seconds per sample\n", | |
"240: 0.00014326087666745007 seconds per sample\n", | |
"250: 0.0001424945208180948 seconds per sample\n", | |
"260: 0.000139262941148546 seconds per sample\n", | |
"270: 0.0001396596211788839 seconds per sample\n", | |
"280: 0.00013682681046346752 seconds per sample\n", | |
"290: 0.00013390193690139402 seconds per sample\n", | |
"300: 0.00013123715042671888 seconds per sample\n", | |
"310: 0.00012868163669990958 seconds per sample\n", | |
"320: 0.00012634030755063826 seconds per sample\n", | |
"330: 0.00012405401270195075 seconds per sample\n", | |
"340: 0.00012748472152217743 seconds per sample\n", | |
"350: 0.00012548085291501124 seconds per sample\n", | |
"360: 0.00012361111733391676 seconds per sample\n", | |
"370: 0.00012927942198884455 seconds per sample\n", | |
"380: 0.00012735369324371257 seconds per sample\n", | |
"390: 0.00012541731910022629 seconds per sample\n", | |
"400: 0.00012360487198294546 seconds per sample\n", | |
"410: 0.00012185277730008981 seconds per sample\n", | |
"420: 0.00012021393220951325 seconds per sample\n", | |
"430: 0.00011860798793713067 seconds per sample\n", | |
"440: 0.00012439665069926084 seconds per sample\n", | |
"450: 0.00012289869811741053 seconds per sample\n", | |
"460: 0.00012134936781612239 seconds per sample\n", | |
"470: 0.00011989721067392143 seconds per sample\n", | |
"480: 0.00011847817228638457 seconds per sample\n", | |
"490: 0.0001171261618433562 seconds per sample\n", | |
"500: 0.00011579386012520857 seconds per sample\n", | |
"510: 0.00012082269746963291 seconds per sample\n", | |
"520: 0.00011960695892744009 seconds per sample\n", | |
"530: 0.00011835439505999146 seconds per sample\n", | |
"540: 0.00011712918660556983 seconds per sample\n", | |
"550: 0.00011592162282843339 seconds per sample\n", | |
"560: 0.0001148523068895527 seconds per sample\n", | |
"570: 0.00011373109032519018 seconds per sample\n", | |
"580: 0.00011266693601099431 seconds per sample\n", | |
"590: 0.00011160894093779742 seconds per sample\n", | |
"600: 0.00011065121300010237 seconds per sample\n", | |
"610: 0.00011720009599301702 seconds per sample\n", | |
"620: 0.00011620352617592435 seconds per sample\n", | |
"630: 0.00011524798941876734 seconds per sample\n", | |
"640: 0.00011436206502215167 seconds per sample\n", | |
"650: 0.00011355353207449026 seconds per sample\n", | |
"660: 0.00011263564567161941 seconds per sample\n", | |
"670: 0.00011175826480598279 seconds per sample\n", | |
"680: 0.00011094411214192708 seconds per sample\n", | |
"690: 0.00011008175685678308 seconds per sample\n", | |
"700: 0.0001092780163556805 seconds per sample\n", | |
"710: 0.00010846335173659183 seconds per sample\n", | |
"720: 0.000107738082187351 seconds per sample\n", | |
"730: 0.00010696807736084986 seconds per sample\n", | |
"740: 0.00010625006538010158 seconds per sample\n", | |
"750: 0.00010554006350500764 seconds per sample\n", | |
"760: 0.00010484715485541485 seconds per sample\n", | |
"770: 0.00010419819605489649 seconds per sample\n", | |
"780: 0.00010352098071773593 seconds per sample\n", | |
"790: 0.00010287234213499896 seconds per sample\n", | |
"800: 0.00010226430666729454 seconds per sample\n", | |
"810: 0.00010169713623573688 seconds per sample\n", | |
"820: 0.00010112316396436796 seconds per sample\n", | |
"830: 0.00010057075215877895 seconds per sample\n", | |
"840: 0.00010041844688896333 seconds per sample\n", | |
"850: 9.998283431056523e-05 seconds per sample\n", | |
"860: 9.946956036398219e-05 seconds per sample\n", | |
"870: 9.895219594821864e-05 seconds per sample\n", | |
"880: 9.846200196074574e-05 seconds per sample\n", | |
"890: 9.796675608214305e-05 seconds per sample\n", | |
"900: 9.752722347483916e-05 seconds per sample\n", | |
"910: 9.70596539594732e-05 seconds per sample\n", | |
"920: 9.652431831815473e-05 seconds per sample\n", | |
"930: 9.605579806449718e-05 seconds per sample\n", | |
"940: 9.556227105330204e-05 seconds per sample\n", | |
"950: 9.511246666171198e-05 seconds per sample\n", | |
"960: 9.462587791228518e-05 seconds per sample\n", | |
"970: 9.419301756869629e-05 seconds per sample\n", | |
"980: 9.373835953489842e-05 seconds per sample\n", | |
"990: 9.333930992334329e-05 seconds per sample\n", | |
"1000: 9.466408492325545e-05 seconds per sample\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import time\n", | |
"\n", | |
"t0 = time.time()\n", | |
"for i, sample in enumerate(hub_ds.tensorflow()):\n", | |
" sample[\"images\"] # Access the image to omake sure its loaded\n", | |
"\n", | |
" if i % 10 == 0:\n", | |
" tf_time = (time.time() - t0) / (i + 1)\n", | |
" print(f\"{i}: {tf_time} seconds per sample\")\n", | |
"\n", | |
" if i > 1000:\n", | |
" break" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "brfXuAst_LEx", | |
"outputId": "30cf21f2-c32d-42e5-acf9-8e09667ecf3a" | |
}, | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"0: 2.5499863624572754 seconds per sample\n", | |
"10: 0.2446353869004683 seconds per sample\n", | |
"20: 0.1349175430479504 seconds per sample\n", | |
"30: 0.09664191738251716 seconds per sample\n", | |
"40: 0.07681980947168862 seconds per sample\n", | |
"50: 0.06613618252324123 seconds per sample\n", | |
"60: 0.057545884710843445 seconds per sample\n", | |
"70: 0.05134633225454411 seconds per sample\n", | |
"80: 0.04689141850412628 seconds per sample\n", | |
"90: 0.04354245322091239 seconds per sample\n", | |
"100: 0.04098711391486744 seconds per sample\n", | |
"110: 0.038674784136248065 seconds per sample\n", | |
"120: 0.036734787885807765 seconds per sample\n", | |
"130: 0.035161020191571184 seconds per sample\n", | |
"140: 0.03381282894323904 seconds per sample\n", | |
"150: 0.032525776237841475 seconds per sample\n", | |
"160: 0.03146453525709069 seconds per sample\n", | |
"170: 0.030439116104304442 seconds per sample\n", | |
"180: 0.029594832362391015 seconds per sample\n", | |
"190: 0.02876018728885351 seconds per sample\n", | |
"200: 0.028106065531868247 seconds per sample\n", | |
"210: 0.02754987020628147 seconds per sample\n", | |
"220: 0.026983977442952844 seconds per sample\n", | |
"230: 0.02648484861695921 seconds per sample\n", | |
"240: 0.02606088689748677 seconds per sample\n", | |
"250: 0.025639268981508048 seconds per sample\n", | |
"260: 0.026758706432649458 seconds per sample\n", | |
"270: 0.028794135554690202 seconds per sample\n", | |
"280: 0.028282693272383613 seconds per sample\n", | |
"290: 0.027772724833275444 seconds per sample\n", | |
"300: 0.027268230717047506 seconds per sample\n", | |
"310: 0.026819964313813727 seconds per sample\n", | |
"320: 0.026403941098031968 seconds per sample\n", | |
"330: 0.025994226651609484 seconds per sample\n", | |
"340: 0.025647930385779776 seconds per sample\n", | |
"350: 0.025299713482544294 seconds per sample\n", | |
"360: 0.024995083954195568 seconds per sample\n", | |
"370: 0.024684292929513112 seconds per sample\n", | |
"380: 0.024388362103559838 seconds per sample\n", | |
"390: 0.024118682612543522 seconds per sample\n", | |
"400: 0.02383773760902614 seconds per sample\n", | |
"410: 0.023587406116680507 seconds per sample\n", | |
"420: 0.023334799922843444 seconds per sample\n", | |
"430: 0.02311845113395815 seconds per sample\n", | |
"440: 0.022908488639087635 seconds per sample\n", | |
"450: 0.022694417484055072 seconds per sample\n", | |
"460: 0.02249204103962201 seconds per sample\n", | |
"470: 0.0222954299546098 seconds per sample\n", | |
"480: 0.02211146196060022 seconds per sample\n", | |
"490: 0.0219388906436151 seconds per sample\n", | |
"500: 0.02177416635844522 seconds per sample\n", | |
"510: 0.021608307403594314 seconds per sample\n", | |
"520: 0.02144949861771772 seconds per sample\n", | |
"530: 0.021318212067340053 seconds per sample\n", | |
"540: 0.021276324161099418 seconds per sample\n", | |
"550: 0.021199111713471733 seconds per sample\n", | |
"560: 0.021162507793270117 seconds per sample\n", | |
"570: 0.021061609588863554 seconds per sample\n", | |
"580: 0.020974042698766605 seconds per sample\n", | |
"590: 0.020901572684144408 seconds per sample\n", | |
"600: 0.020755505601498928 seconds per sample\n", | |
"610: 0.02061978221524952 seconds per sample\n", | |
"620: 0.020484183721496286 seconds per sample\n", | |
"630: 0.020357846458060238 seconds per sample\n", | |
"640: 0.020241918876278978 seconds per sample\n", | |
"650: 0.020127672204224196 seconds per sample\n", | |
"660: 0.02002317028940175 seconds per sample\n", | |
"670: 0.01992295573614038 seconds per sample\n", | |
"680: 0.019820148199140238 seconds per sample\n", | |
"690: 0.019718368215602317 seconds per sample\n", | |
"700: 0.019622194613947846 seconds per sample\n", | |
"710: 0.01953984882928819 seconds per sample\n", | |
"720: 0.019442752727027078 seconds per sample\n", | |
"730: 0.019360974777576534 seconds per sample\n", | |
"740: 0.01926791716201103 seconds per sample\n", | |
"750: 0.01918858472262813 seconds per sample\n", | |
"760: 0.019111050882727965 seconds per sample\n", | |
"770: 0.020481179815012813 seconds per sample\n", | |
"780: 0.020399160482819347 seconds per sample\n", | |
"790: 0.020299239646921267 seconds per sample\n", | |
"800: 0.020197581709100958 seconds per sample\n", | |
"810: 0.020100509782313713 seconds per sample\n", | |
"820: 0.0200064675961865 seconds per sample\n", | |
"830: 0.01992182238150733 seconds per sample\n", | |
"840: 0.01983856843002628 seconds per sample\n", | |
"850: 0.01976132953208986 seconds per sample\n", | |
"860: 0.019684644249396595 seconds per sample\n", | |
"870: 0.019605942352767927 seconds per sample\n", | |
"880: 0.0195262732489561 seconds per sample\n", | |
"890: 0.019454815037188974 seconds per sample\n", | |
"900: 0.019378571346253322 seconds per sample\n", | |
"910: 0.01932675281026361 seconds per sample\n", | |
"920: 0.019277630100809403 seconds per sample\n", | |
"930: 0.019214135875763365 seconds per sample\n", | |
"940: 0.019151381554436353 seconds per sample\n", | |
"950: 0.01909038872623544 seconds per sample\n", | |
"960: 0.01911944157127039 seconds per sample\n", | |
"970: 0.019062647981574924 seconds per sample\n", | |
"980: 0.01899950997666603 seconds per sample\n", | |
"990: 0.018944131858894972 seconds per sample\n", | |
"1000: 0.01888610647393988 seconds per sample\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"print(f\"TF dataset is {tf_time / hub_time} times slower\")" | |
], | |
"metadata": { | |
"id": "K_Kg7llw_W1U", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "6e4f6ae6-dc9a-4dbc-be2d-0bbf1cfa5ac9" | |
}, | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"TF dataset is 199.50656565529493 times slower\n" | |
] | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment