Skip to content

Instantly share code, notes, and snippets.

@roaramburu
Created September 10, 2020 20:52
Show Gist options
  • Save roaramburu/ee16f86953b7011ad0ee0f31684a4b48 to your computer and use it in GitHub Desktop.
Save roaramburu/ee16f86953b7011ad0ee0f31684a4b48 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table style=\"border: 2px solid white;\">\n",
"<tr>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3 style=\"text-align: left;\">Client</h3>\n",
"<ul style=\"text-align: left; list-style: none; margin: 0; padding: 0;\">\n",
" <li><b>Scheduler: </b>tcp://54.234.176.69:8786</li>\n",
" <li><b>Dashboard: </b><a href='http://54.234.176.69:8787/status' target='_blank'>http://54.234.176.69:8787/status</a></li>\n",
"</ul>\n",
"</td>\n",
"<td style=\"vertical-align: top; border: 0px solid white\">\n",
"<h3 style=\"text-align: left;\">Cluster</h3>\n",
"<ul style=\"text-align: left; list-style:none; margin: 0; padding: 0;\">\n",
" <li><b>Workers: </b>32</li>\n",
" <li><b>Cores: </b>128</li>\n",
" <li><b>Memory: </b>527.41 GB</li>\n",
"</ul>\n",
"</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"<Client: 'tcp://172.31.25.125:8786' processes=32 threads=128, memory=527.41 GB>"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from dask.distributed import Client\n",
"\n",
"client = Client('54.234.176.69:8786')\n",
"client"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from blazingsql import BlazingContext"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"BlazingContext ready\n"
]
}
],
"source": [
"bc = BlazingContext(dask_client = client)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(True,\n",
" '',\n",
" OrderedDict([('type', 's3'),\n",
" ('bucket_name', 'blazingsql-colab'),\n",
" ('access_key_id', ''),\n",
" ('secret_key', ''),\n",
" ('session_token', ''),\n",
" ('encryption_type', <S3EncryptionType.NONE: 1>),\n",
" ('kms_key_amazon_resource_name', ''),\n",
" ('endpoint_override', ''),\n",
" ('region', '')]))"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# register AWS S3 bucket\n",
"bc.s3('bsql_data', bucket_name='blazingsql-colab')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.18 s, sys: 507 ms, total: 1.69 s\n",
"Wall time: 13.9 s\n"
]
}
],
"source": [
"%%time\n",
"bc.create_table('lineitem', 's3://bsql_data/tpch_sf1000/lineitem/')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.41 s, sys: 33.2 ms, total: 1.44 s\n",
"Wall time: 3.39 s\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>count(*)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>5999989709</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" count(*)\n",
"0 5999989709"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"bc.sql('select count(*) from lineitem').head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1.24 s, sys: 39.5 ms, total: 1.28 s\n",
"Wall time: 17.3 s\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>l_returnflag</th>\n",
" <th>l_linestatus</th>\n",
" <th>sum_qty</th>\n",
" <th>sum_base_price</th>\n",
" <th>sum_disc_price</th>\n",
" <th>sum_charge</th>\n",
" <th>avg_qty</th>\n",
" <th>avg_price</th>\n",
" <th>avg_disc</th>\n",
" <th>count_order</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>A</td>\n",
" <td>F</td>\n",
" <td>3.775711e+10</td>\n",
" <td>5.661653e+13</td>\n",
" <td>5.378567e+13</td>\n",
" <td>5.593707e+13</td>\n",
" <td>25.499822</td>\n",
" <td>38236.804903</td>\n",
" <td>0.050001</td>\n",
" <td>1480681438</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" l_returnflag l_linestatus sum_qty sum_base_price sum_disc_price \\\n",
"0 A F 3.775711e+10 5.661653e+13 5.378567e+13 \n",
"\n",
" sum_charge avg_qty avg_price avg_disc count_order \n",
"0 5.593707e+13 25.499822 38236.804903 0.050001 1480681438 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"# Q1\n",
"query =\"\"\"\n",
" select\n",
" l_returnflag,\n",
" l_linestatus,\n",
" sum(l_quantity) as sum_qty,\n",
" sum(l_extendedprice) as sum_base_price,\n",
" sum(l_extendedprice*(1-l_discount)) as sum_disc_price,\n",
" sum(l_extendedprice*(1-l_discount)*(1+l_tax)) as sum_charge,\n",
" avg(l_quantity) as avg_qty,\n",
" avg(l_extendedprice) as avg_price,\n",
" avg(l_discount) as avg_disc,\n",
" count(*) as count_order\n",
" from\n",
" lineitem\n",
" where\n",
" l_shipdate <= date '1998-12-01' - interval '90' day\n",
" group by\n",
" l_returnflag,\n",
" l_linestatus\n",
" order by\n",
" l_returnflag,\n",
" l_linestatus\n",
"\"\"\"\n",
"\n",
"bc.sql(query).head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 693 ms, sys: 34.8 ms, total: 728 ms\n",
"Wall time: 15.3 s\n"
]
}
],
"source": [
"%%time\n",
"bc.create_table('orders', 's3://bsql_data/tpch_sf1000/orders/')\n",
"\n",
"bc.create_table('customer', 's3://bsql_data/tpch_sf1000/customer/')\n",
"\n",
"bc.create_table('part', 's3://bsql_data/tpch_sf1000/part/')\n",
"\n",
"bc.create_table('partsupp', 's3://bsql_data/tpch_sf1000/partsupp/')\n",
"\n",
"bc.create_table('supplier', 's3://bsql_data/tpch_sf1000/supplier/')\n",
"\n",
"bc.create_table('nation', 's3://bsql_data/tpch_sf1/nation/')\n",
"\n",
"bc.create_table('region', 's3://bsql_data/tpch_sf1/region/')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 905 ms, sys: 13.9 ms, total: 919 ms\n",
"Wall time: 15.6 s\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>o_orderpriority</th>\n",
" <th>order_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1-URGENT</td>\n",
" <td>275099515</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2-HIGH</td>\n",
" <td>275096174</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" o_orderpriority order_count\n",
"0 1-URGENT 275099515\n",
"1 2-HIGH 275096174"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"# Q4\n",
"query = \"\"\" \n",
" select\n",
" o_orderpriority, \n",
" count(*) as order_count\n",
" from\n",
" orders\n",
" where\n",
" exists (\n",
" select\n",
" *\n",
" from\n",
" lineitem\n",
" where\n",
" l_orderkey = o_orderkey\n",
" and l_commitdate < l_receiptdate\n",
" )\n",
" group by\n",
" o_orderpriority\n",
" order by\n",
" o_orderpriority\n",
" \"\"\"\n",
"\n",
"\n",
"bc.sql(query).head()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Rapids Stable",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment