Skip to content

Instantly share code, notes, and snippets.

@roaramburu
Last active October 10, 2020 06:03
Show Gist options
  • Save roaramburu/244eec9c8f0dfdb0c5335940b57fb7a4 to your computer and use it in GitHub Desktop.
Save roaramburu/244eec9c8f0dfdb0c5335940b57fb7a4 to your computer and use it in GitHub Desktop.
An example notebook running a 1TB query on a single GPU using https://app.blazingsql.com
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# TPC-H SF1K (1000GB) Demo\n",
"\n",
"### Create BlazingSQL Context"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"BlazingContext ready\n"
]
}
],
"source": [
"from blazingsql import BlazingContext\n",
"bc = BlazingContext()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Register S3 Bucket and Create Table on BlazingSQL"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(True,\n",
" '',\n",
" OrderedDict([('type', 's3'),\n",
" ('bucket_name', 'blazingsql-colab'),\n",
" ('access_key_id', ''),\n",
" ('secret_key', ''),\n",
" ('session_token', ''),\n",
" ('encryption_type', <S3EncryptionType.NONE: 1>),\n",
" ('kms_key_amazon_resource_name', ''),\n",
" ('endpoint_override', ''),\n",
" ('region', '')]))"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# register AWS S3 bucket\n",
"bc.s3('bsql_data', bucket_name='blazingsql-colab')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 3.52 s, sys: 2.69 s, total: 6.22 s\n",
"Wall time: 29.8 s\n"
]
}
],
"source": [
"%%time\n",
"bc.create_table('lineitem', 's3://bsql_data/tpch_sf1000/lineitem/')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Check Total Row Count"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 57.7 s, sys: 38.4 s, total: 1min 36s\n",
"Wall time: 1min 29s\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>count(*)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>5999989709</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" count(*)\n",
"0 5999989709"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"bc.sql('select count(*) from lineitem')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Run a ~1000GB Query"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 7min 39s, sys: 4min 57s, total: 12min 36s\n",
"Wall time: 8min 22s\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>l_returnflag</th>\n",
" <th>l_linestatus</th>\n",
" <th>sum_qty</th>\n",
" <th>sum_base_price</th>\n",
" <th>sum_disc_price</th>\n",
" <th>sum_charge</th>\n",
" <th>avg_qty</th>\n",
" <th>avg_price</th>\n",
" <th>avg_disc</th>\n",
" <th>count_order</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>A</td>\n",
" <td>F</td>\n",
" <td>3.775711e+10</td>\n",
" <td>5.661653e+13</td>\n",
" <td>5.378567e+13</td>\n",
" <td>5.593707e+13</td>\n",
" <td>25.499822</td>\n",
" <td>38236.804903</td>\n",
" <td>0.050001</td>\n",
" <td>1480681438</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>N</td>\n",
" <td>F</td>\n",
" <td>9.858022e+08</td>\n",
" <td>1.478169e+12</td>\n",
" <td>1.404256e+12</td>\n",
" <td>1.460444e+12</td>\n",
" <td>25.501164</td>\n",
" <td>38237.936091</td>\n",
" <td>0.050008</td>\n",
" <td>38657144</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>N</td>\n",
" <td>O</td>\n",
" <td>7.434706e+10</td>\n",
" <td>1.114836e+14</td>\n",
" <td>1.059095e+14</td>\n",
" <td>1.101459e+14</td>\n",
" <td>25.500019</td>\n",
" <td>38237.348018</td>\n",
" <td>0.050000</td>\n",
" <td>2915568939</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>R</td>\n",
" <td>F</td>\n",
" <td>3.775737e+10</td>\n",
" <td>5.661702e+13</td>\n",
" <td>5.378616e+13</td>\n",
" <td>5.593760e+13</td>\n",
" <td>25.500100</td>\n",
" <td>38237.295578</td>\n",
" <td>0.050000</td>\n",
" <td>1480675200</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" l_returnflag l_linestatus sum_qty sum_base_price sum_disc_price \\\n",
"0 A F 3.775711e+10 5.661653e+13 5.378567e+13 \n",
"1 N F 9.858022e+08 1.478169e+12 1.404256e+12 \n",
"2 N O 7.434706e+10 1.114836e+14 1.059095e+14 \n",
"3 R F 3.775737e+10 5.661702e+13 5.378616e+13 \n",
"\n",
" sum_charge avg_qty avg_price avg_disc count_order \n",
"0 5.593707e+13 25.499822 38236.804903 0.050001 1480681438 \n",
"1 1.460444e+12 25.501164 38237.936091 0.050008 38657144 \n",
"2 1.101459e+14 25.500019 38237.348018 0.050000 2915568939 \n",
"3 5.593760e+13 25.500100 38237.295578 0.050000 1480675200 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"# Q1\n",
"query =\"\"\"\n",
" select\n",
" l_returnflag,\n",
" l_linestatus,\n",
" sum(l_quantity) as sum_qty,\n",
" sum(l_extendedprice) as sum_base_price,\n",
" sum(l_extendedprice*(1-l_discount)) as sum_disc_price,\n",
" sum(l_extendedprice*(1-l_discount)*(1+l_tax)) as sum_charge,\n",
" avg(l_quantity) as avg_qty,\n",
" avg(l_extendedprice) as avg_price,\n",
" avg(l_discount) as avg_disc,\n",
" count(*) as count_order\n",
" from\n",
" lineitem\n",
" where\n",
" l_shipdate <= date '1998-12-01' - interval '90' day\n",
" group by\n",
" l_returnflag,\n",
" l_linestatus\n",
" order by\n",
" l_returnflag,\n",
" l_linestatus\n",
"\"\"\"\n",
"\n",
"bc.sql(query)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "RAPIDS Nightly",
"language": "python",
"name": "rapids-nightly"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment