Skip to content

Instantly share code, notes, and snippets.

@roaramburu
Created October 5, 2020 17:26
Show Gist options
  • Save roaramburu/a7d147d116180c8d93e533ec4bb4f1cd to your computer and use it in GitHub Desktop.
Save roaramburu/a7d147d116180c8d93e533ec4bb4f1cd to your computer and use it in GitHub Desktop.
Blazing Notebooks 4 GPU Demo
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table style=\"border: 2px solid white;\">\n",
"<tr>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3 style=\"text-align: left;\">Client</h3>\n",
"<ul style=\"text-align: left; list-style: none; margin: 0; padding: 0;\">\n",
" <li><b>Scheduler: </b>tcp://34.201.57.69:8786</li>\n",
" <li><b>Dashboard: </b><a href='http://34.201.57.69:8787/status' target='_blank'>http://34.201.57.69:8787/status</a></li>\n",
"</ul>\n",
"</td>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3 style=\"text-align: left;\">Cluster</h3>\n",
"<ul style=\"text-align: left; list-style:none; margin: 0; padding: 0;\">\n",
" <li><b>Workers: </b>4</li>\n",
" <li><b>Cores: </b>16</li>\n",
" <li><b>Memory: </b>65.93 GB</li>\n",
"</ul>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"<Client: 'tcp://172.31.50.204:8786' processes=4 threads=16, memory=65.93 GB>"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from dask.distributed import Client\n",
"\n",
"client = Client('34.201.57.69:8786')\n",
"client"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from blazingsql import BlazingContext"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"BlazingContext ready\n"
]
}
],
"source": [
"bc = BlazingContext(dask_client = client)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(True,\n",
" '',\n",
" OrderedDict([('type', 's3'),\n",
" ('bucket_name', 'blazingsql-colab'),\n",
" ('access_key_id', ''),\n",
" ('secret_key', ''),\n",
" ('session_token', ''),\n",
" ('encryption_type', <S3EncryptionType.NONE: 1>),\n",
" ('kms_key_amazon_resource_name', ''),\n",
" ('endpoint_override', ''),\n",
" ('region', '')]))"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# register AWS S3 bucket\n",
"bc.s3('bsql_data', bucket_name='blazingsql-colab')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.14 s, sys: 560 ms, total: 1.7 s\n",
"Wall time: 6.01 s\n"
]
}
],
"source": [
"%%time\n",
"bc.create_table('lineitem', 's3://bsql_data/tpch_sf100/lineitem/')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.64 s, sys: 72.4 ms, total: 1.71 s\n",
"Wall time: 2.61 s\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>count(*)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>600037902</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" count(*)\n",
"0 600037902"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"bc.sql('select count(*) from lineitem').head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.28 s, sys: 65.3 ms, total: 1.34 s\n",
"Wall time: 13.7 s\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>l_returnflag</th>\n",
" <th>l_linestatus</th>\n",
" <th>sum_qty</th>\n",
" <th>sum_base_price</th>\n",
" <th>sum_disc_price</th>\n",
" <th>sum_charge</th>\n",
" <th>avg_qty</th>\n",
" <th>avg_price</th>\n",
" <th>avg_disc</th>\n",
" <th>count_order</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>A</td>\n",
" <td>F</td>\n",
" <td>3.770265e+09</td>\n",
" <td>5.654431e+12</td>\n",
" <td>5.372649e+12</td>\n",
" <td>5.586929e+12</td>\n",
" <td>25.466522</td>\n",
" <td>38193.261719</td>\n",
" <td>0.050031</td>\n",
" <td>148047881</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" l_returnflag l_linestatus sum_qty sum_base_price sum_disc_price \\\n",
"0 A F 3.770265e+09 5.654431e+12 5.372649e+12 \n",
"\n",
" sum_charge avg_qty avg_price avg_disc count_order \n",
"0 5.586929e+12 25.466522 38193.261719 0.050031 148047881 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"# Q1\n",
"query =\"\"\"\n",
" select\n",
" l_returnflag,\n",
" l_linestatus,\n",
" sum(l_quantity) as sum_qty,\n",
" sum(l_extendedprice) as sum_base_price,\n",
" sum(l_extendedprice*(1-l_discount)) as sum_disc_price,\n",
" sum(l_extendedprice*(1-l_discount)*(1+l_tax)) as sum_charge,\n",
" avg(l_quantity) as avg_qty,\n",
" avg(l_extendedprice) as avg_price,\n",
" avg(l_discount) as avg_disc,\n",
" count(*) as count_order\n",
" from\n",
" lineitem\n",
" where\n",
" l_shipdate <= date '1998-12-01' - interval '90' day\n",
" group by\n",
" l_returnflag,\n",
" l_linestatus\n",
" order by\n",
" l_returnflag,\n",
" l_linestatus\n",
"\"\"\"\n",
"\n",
"bc.sql(query).head()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Rapids Stable",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment