Created
October 5, 2020 17:26
-
-
Save roaramburu/a7d147d116180c8d93e533ec4bb4f1cd to your computer and use it in GitHub Desktop.
Blazing Notebooks 4 GPU Demo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table style=\"border: 2px solid white;\">\n", | |
"<tr>\n", | |
"<td style=\"vertical-align: top; border: 0px solid white\">\n", | |
"<h3 style=\"text-align: left;\">Client</h3>\n", | |
"<ul style=\"text-align: left; list-style: none; margin: 0; padding: 0;\">\n", | |
" <li><b>Scheduler: </b>tcp://34.201.57.69:8786</li>\n", | |
" <li><b>Dashboard: </b><a href='http://34.201.57.69:8787/status' target='_blank'>http://34.201.57.69:8787/status</a></li>\n", | |
"</ul>\n", | |
"</td>\n", | |
"<td style=\"vertical-align: top; border: 0px solid white\">\n", | |
"<h3 style=\"text-align: left;\">Cluster</h3>\n", | |
"<ul style=\"text-align: left; list-style:none; margin: 0; padding: 0;\">\n", | |
" <li><b>Workers: </b>4</li>\n", | |
" <li><b>Cores: </b>16</li>\n", | |
" <li><b>Memory: </b>65.93 GB</li>\n", | |
"</ul>\n", | |
"</td>\n", | |
"</tr>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<Client: 'tcp://172.31.50.204:8786' processes=4 threads=16, memory=65.93 GB>" | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from dask.distributed import Client\n", | |
"\n", | |
"client = Client('34.201.57.69:8786')\n", | |
"client" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from blazingsql import BlazingContext" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"BlazingContext ready\n" | |
] | |
} | |
], | |
"source": [ | |
"bc = BlazingContext(dask_client = client)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(True,\n", | |
" '',\n", | |
" OrderedDict([('type', 's3'),\n", | |
" ('bucket_name', 'blazingsql-colab'),\n", | |
" ('access_key_id', ''),\n", | |
" ('secret_key', ''),\n", | |
" ('session_token', ''),\n", | |
" ('encryption_type', <S3EncryptionType.NONE: 1>),\n", | |
" ('kms_key_amazon_resource_name', ''),\n", | |
" ('endpoint_override', ''),\n", | |
" ('region', '')]))" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# register AWS S3 bucket\n", | |
"bc.s3('bsql_data', bucket_name='blazingsql-colab')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 1.14 s, sys: 560 ms, total: 1.7 s\n", | |
"Wall time: 6.01 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"bc.create_table('lineitem', 's3://bsql_data/tpch_sf100/lineitem/')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 1.64 s, sys: 72.4 ms, total: 1.71 s\n", | |
"Wall time: 2.61 s\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>count(*)</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>600037902</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" count(*)\n", | |
"0 600037902" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"bc.sql('select count(*) from lineitem').head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 1.28 s, sys: 65.3 ms, total: 1.34 s\n", | |
"Wall time: 13.7 s\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>l_returnflag</th>\n", | |
" <th>l_linestatus</th>\n", | |
" <th>sum_qty</th>\n", | |
" <th>sum_base_price</th>\n", | |
" <th>sum_disc_price</th>\n", | |
" <th>sum_charge</th>\n", | |
" <th>avg_qty</th>\n", | |
" <th>avg_price</th>\n", | |
" <th>avg_disc</th>\n", | |
" <th>count_order</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>A</td>\n", | |
" <td>F</td>\n", | |
" <td>3.770265e+09</td>\n", | |
" <td>5.654431e+12</td>\n", | |
" <td>5.372649e+12</td>\n", | |
" <td>5.586929e+12</td>\n", | |
" <td>25.466522</td>\n", | |
" <td>38193.261719</td>\n", | |
" <td>0.050031</td>\n", | |
" <td>148047881</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" l_returnflag l_linestatus sum_qty sum_base_price sum_disc_price \\\n", | |
"0 A F 3.770265e+09 5.654431e+12 5.372649e+12 \n", | |
"\n", | |
" sum_charge avg_qty avg_price avg_disc count_order \n", | |
"0 5.586929e+12 25.466522 38193.261719 0.050031 148047881 " | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"# Q1\n", | |
"query =\"\"\"\n", | |
" select\n", | |
" l_returnflag,\n", | |
" l_linestatus,\n", | |
" sum(l_quantity) as sum_qty,\n", | |
" sum(l_extendedprice) as sum_base_price,\n", | |
" sum(l_extendedprice*(1-l_discount)) as sum_disc_price,\n", | |
" sum(l_extendedprice*(1-l_discount)*(1+l_tax)) as sum_charge,\n", | |
" avg(l_quantity) as avg_qty,\n", | |
" avg(l_extendedprice) as avg_price,\n", | |
" avg(l_discount) as avg_disc,\n", | |
" count(*) as count_order\n", | |
" from\n", | |
" lineitem\n", | |
" where\n", | |
" l_shipdate <= date '1998-12-01' - interval '90' day\n", | |
" group by\n", | |
" l_returnflag,\n", | |
" l_linestatus\n", | |
" order by\n", | |
" l_returnflag,\n", | |
" l_linestatus\n", | |
"\"\"\"\n", | |
"\n", | |
"bc.sql(query).head()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Rapids Stable", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment