Last active
October 10, 2020 06:03
-
-
Save roaramburu/244eec9c8f0dfdb0c5335940b57fb7a4 to your computer and use it in GitHub Desktop.
An example notebook running a 1TB query on a single GPU using https://app.blazingsql.com
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# TPC-H SF1K (1000GB) Demo\n", | |
"\n", | |
"### Create BlazingSQL Context" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"BlazingContext ready\n" | |
] | |
} | |
], | |
"source": [ | |
"from blazingsql import BlazingContext\n", | |
"bc = BlazingContext()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Register S3 Bucket and Create Table on BlazingSQL" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(True,\n", | |
" '',\n", | |
" OrderedDict([('type', 's3'),\n", | |
" ('bucket_name', 'blazingsql-colab'),\n", | |
" ('access_key_id', ''),\n", | |
" ('secret_key', ''),\n", | |
" ('session_token', ''),\n", | |
" ('encryption_type', <S3EncryptionType.NONE: 1>),\n", | |
" ('kms_key_amazon_resource_name', ''),\n", | |
" ('endpoint_override', ''),\n", | |
" ('region', '')]))" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# register AWS S3 bucket\n", | |
"bc.s3('bsql_data', bucket_name='blazingsql-colab')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 3.52 s, sys: 2.69 s, total: 6.22 s\n", | |
"Wall time: 29.8 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"bc.create_table('lineitem', 's3://bsql_data/tpch_sf1000/lineitem/')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Check Total Row Count" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 57.7 s, sys: 38.4 s, total: 1min 36s\n", | |
"Wall time: 1min 29s\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>count(*)</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>5999989709</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" count(*)\n", | |
"0 5999989709" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"bc.sql('select count(*) from lineitem')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Run a ~1000GB Query" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 7min 39s, sys: 4min 57s, total: 12min 36s\n", | |
"Wall time: 8min 22s\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>l_returnflag</th>\n", | |
" <th>l_linestatus</th>\n", | |
" <th>sum_qty</th>\n", | |
" <th>sum_base_price</th>\n", | |
" <th>sum_disc_price</th>\n", | |
" <th>sum_charge</th>\n", | |
" <th>avg_qty</th>\n", | |
" <th>avg_price</th>\n", | |
" <th>avg_disc</th>\n", | |
" <th>count_order</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>A</td>\n", | |
" <td>F</td>\n", | |
" <td>3.775711e+10</td>\n", | |
" <td>5.661653e+13</td>\n", | |
" <td>5.378567e+13</td>\n", | |
" <td>5.593707e+13</td>\n", | |
" <td>25.499822</td>\n", | |
" <td>38236.804903</td>\n", | |
" <td>0.050001</td>\n", | |
" <td>1480681438</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>N</td>\n", | |
" <td>F</td>\n", | |
" <td>9.858022e+08</td>\n", | |
" <td>1.478169e+12</td>\n", | |
" <td>1.404256e+12</td>\n", | |
" <td>1.460444e+12</td>\n", | |
" <td>25.501164</td>\n", | |
" <td>38237.936091</td>\n", | |
" <td>0.050008</td>\n", | |
" <td>38657144</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>N</td>\n", | |
" <td>O</td>\n", | |
" <td>7.434706e+10</td>\n", | |
" <td>1.114836e+14</td>\n", | |
" <td>1.059095e+14</td>\n", | |
" <td>1.101459e+14</td>\n", | |
" <td>25.500019</td>\n", | |
" <td>38237.348018</td>\n", | |
" <td>0.050000</td>\n", | |
" <td>2915568939</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>R</td>\n", | |
" <td>F</td>\n", | |
" <td>3.775737e+10</td>\n", | |
" <td>5.661702e+13</td>\n", | |
" <td>5.378616e+13</td>\n", | |
" <td>5.593760e+13</td>\n", | |
" <td>25.500100</td>\n", | |
" <td>38237.295578</td>\n", | |
" <td>0.050000</td>\n", | |
" <td>1480675200</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" l_returnflag l_linestatus sum_qty sum_base_price sum_disc_price \\\n", | |
"0 A F 3.775711e+10 5.661653e+13 5.378567e+13 \n", | |
"1 N F 9.858022e+08 1.478169e+12 1.404256e+12 \n", | |
"2 N O 7.434706e+10 1.114836e+14 1.059095e+14 \n", | |
"3 R F 3.775737e+10 5.661702e+13 5.378616e+13 \n", | |
"\n", | |
" sum_charge avg_qty avg_price avg_disc count_order \n", | |
"0 5.593707e+13 25.499822 38236.804903 0.050001 1480681438 \n", | |
"1 1.460444e+12 25.501164 38237.936091 0.050008 38657144 \n", | |
"2 1.101459e+14 25.500019 38237.348018 0.050000 2915568939 \n", | |
"3 5.593760e+13 25.500100 38237.295578 0.050000 1480675200 " | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"# Q1\n", | |
"query =\"\"\"\n", | |
" select\n", | |
" l_returnflag,\n", | |
" l_linestatus,\n", | |
" sum(l_quantity) as sum_qty,\n", | |
" sum(l_extendedprice) as sum_base_price,\n", | |
" sum(l_extendedprice*(1-l_discount)) as sum_disc_price,\n", | |
" sum(l_extendedprice*(1-l_discount)*(1+l_tax)) as sum_charge,\n", | |
" avg(l_quantity) as avg_qty,\n", | |
" avg(l_extendedprice) as avg_price,\n", | |
" avg(l_discount) as avg_disc,\n", | |
" count(*) as count_order\n", | |
" from\n", | |
" lineitem\n", | |
" where\n", | |
" l_shipdate <= date '1998-12-01' - interval '90' day\n", | |
" group by\n", | |
" l_returnflag,\n", | |
" l_linestatus\n", | |
" order by\n", | |
" l_returnflag,\n", | |
" l_linestatus\n", | |
"\"\"\"\n", | |
"\n", | |
"bc.sql(query)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "RAPIDS Nightly", | |
"language": "python", | |
"name": "rapids-nightly" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.9" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment