Created
September 10, 2020 20:52
-
-
Save roaramburu/ee16f86953b7011ad0ee0f31684a4b48 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table style=\"border: 2px solid white;\">\n", | |
"<tr>\n", | |
"<td style=\"vertical-align: top; border: 0px solid white\">\n", | |
"<h3 style=\"text-align: left;\">Client</h3>\n", | |
"<ul style=\"text-align: left; list-style: none; margin: 0; padding: 0;\">\n", | |
" <li><b>Scheduler: </b>tcp://54.234.176.69:8786</li>\n", | |
" <li><b>Dashboard: </b><a href='http://54.234.176.69:8787/status' target='_blank'>http://54.234.176.69:8787/status</a></li>\n", | |
"</ul>\n", | |
"</td>\n", | |
"<td style=\"vertical-align: top; border: 0px solid white\">\n", | |
"<h3 style=\"text-align: left;\">Cluster</h3>\n", | |
"<ul style=\"text-align: left; list-style:none; margin: 0; padding: 0;\">\n", | |
" <li><b>Workers: </b>32</li>\n", | |
" <li><b>Cores: </b>128</li>\n", | |
" <li><b>Memory: </b>527.41 GB</li>\n", | |
"</ul>\n", | |
"</td>\n", | |
"</tr>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<Client: 'tcp://172.31.25.125:8786' processes=32 threads=128, memory=527.41 GB>" | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from dask.distributed import Client\n", | |
"\n", | |
"client = Client('54.234.176.69:8786')\n", | |
"client" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from blazingsql import BlazingContext" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"BlazingContext ready\n" | |
] | |
} | |
], | |
"source": [ | |
"bc = BlazingContext(dask_client = client)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(True,\n", | |
" '',\n", | |
" OrderedDict([('type', 's3'),\n", | |
" ('bucket_name', 'blazingsql-colab'),\n", | |
" ('access_key_id', ''),\n", | |
" ('secret_key', ''),\n", | |
" ('session_token', ''),\n", | |
" ('encryption_type', <S3EncryptionType.NONE: 1>),\n", | |
" ('kms_key_amazon_resource_name', ''),\n", | |
" ('endpoint_override', ''),\n", | |
" ('region', '')]))" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# register AWS S3 bucket\n", | |
"bc.s3('bsql_data', bucket_name='blazingsql-colab')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 1.18 s, sys: 507 ms, total: 1.69 s\n", | |
"Wall time: 13.9 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"bc.create_table('lineitem', 's3://bsql_data/tpch_sf1000/lineitem/')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 1.41 s, sys: 33.2 ms, total: 1.44 s\n", | |
"Wall time: 3.39 s\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>count(*)</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>5999989709</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" count(*)\n", | |
"0 5999989709" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"bc.sql('select count(*) from lineitem').head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 1.24 s, sys: 39.5 ms, total: 1.28 s\n", | |
"Wall time: 17.3 s\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>l_returnflag</th>\n", | |
" <th>l_linestatus</th>\n", | |
" <th>sum_qty</th>\n", | |
" <th>sum_base_price</th>\n", | |
" <th>sum_disc_price</th>\n", | |
" <th>sum_charge</th>\n", | |
" <th>avg_qty</th>\n", | |
" <th>avg_price</th>\n", | |
" <th>avg_disc</th>\n", | |
" <th>count_order</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>A</td>\n", | |
" <td>F</td>\n", | |
" <td>3.775711e+10</td>\n", | |
" <td>5.661653e+13</td>\n", | |
" <td>5.378567e+13</td>\n", | |
" <td>5.593707e+13</td>\n", | |
" <td>25.499822</td>\n", | |
" <td>38236.804903</td>\n", | |
" <td>0.050001</td>\n", | |
" <td>1480681438</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" l_returnflag l_linestatus sum_qty sum_base_price sum_disc_price \\\n", | |
"0 A F 3.775711e+10 5.661653e+13 5.378567e+13 \n", | |
"\n", | |
" sum_charge avg_qty avg_price avg_disc count_order \n", | |
"0 5.593707e+13 25.499822 38236.804903 0.050001 1480681438 " | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"# Q1\n", | |
"query =\"\"\"\n", | |
" select\n", | |
" l_returnflag,\n", | |
" l_linestatus,\n", | |
" sum(l_quantity) as sum_qty,\n", | |
" sum(l_extendedprice) as sum_base_price,\n", | |
" sum(l_extendedprice*(1-l_discount)) as sum_disc_price,\n", | |
" sum(l_extendedprice*(1-l_discount)*(1+l_tax)) as sum_charge,\n", | |
" avg(l_quantity) as avg_qty,\n", | |
" avg(l_extendedprice) as avg_price,\n", | |
" avg(l_discount) as avg_disc,\n", | |
" count(*) as count_order\n", | |
" from\n", | |
" lineitem\n", | |
" where\n", | |
" l_shipdate <= date '1998-12-01' - interval '90' day\n", | |
" group by\n", | |
" l_returnflag,\n", | |
" l_linestatus\n", | |
" order by\n", | |
" l_returnflag,\n", | |
" l_linestatus\n", | |
"\"\"\"\n", | |
"\n", | |
"bc.sql(query).head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 693 ms, sys: 34.8 ms, total: 728 ms\n", | |
"Wall time: 15.3 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"bc.create_table('orders', 's3://bsql_data/tpch_sf1000/orders/')\n", | |
"\n", | |
"bc.create_table('customer', 's3://bsql_data/tpch_sf1000/customer/')\n", | |
"\n", | |
"bc.create_table('part', 's3://bsql_data/tpch_sf1000/part/')\n", | |
"\n", | |
"bc.create_table('partsupp', 's3://bsql_data/tpch_sf1000/partsupp/')\n", | |
"\n", | |
"bc.create_table('supplier', 's3://bsql_data/tpch_sf1000/supplier/')\n", | |
"\n", | |
"bc.create_table('nation', 's3://bsql_data/tpch_sf1/nation/')\n", | |
"\n", | |
"bc.create_table('region', 's3://bsql_data/tpch_sf1/region/')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 905 ms, sys: 13.9 ms, total: 919 ms\n", | |
"Wall time: 15.6 s\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>o_orderpriority</th>\n", | |
" <th>order_count</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1-URGENT</td>\n", | |
" <td>275099515</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2-HIGH</td>\n", | |
" <td>275096174</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" o_orderpriority order_count\n", | |
"0 1-URGENT 275099515\n", | |
"1 2-HIGH 275096174" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"# Q4\n", | |
"query = \"\"\" \n", | |
" select\n", | |
" o_orderpriority, \n", | |
" count(*) as order_count\n", | |
" from\n", | |
" orders\n", | |
" where\n", | |
" exists (\n", | |
" select\n", | |
" *\n", | |
" from\n", | |
" lineitem\n", | |
" where\n", | |
" l_orderkey = o_orderkey\n", | |
" and l_commitdate < l_receiptdate\n", | |
" )\n", | |
" group by\n", | |
" o_orderpriority\n", | |
" order by\n", | |
" o_orderpriority\n", | |
" \"\"\"\n", | |
"\n", | |
"\n", | |
"bc.sql(query).head()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Rapids Stable", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment