Created
January 14, 2020 22:27
-
-
Save roaramburu/3931f565a5dd0a09692512b0a5c37e1a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Imports and BlazingContext" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"BlazingContext ready\n" | |
] | |
} | |
], | |
"source": [ | |
"import cudf\n", | |
"from blazingsql import BlazingContext\n", | |
"bc = BlazingContext()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Download Data and Create Table" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"--2020-01-14 22:15:30-- https://blazingsql-colab.s3.amazonaws.com/tpch_sf1/lineitem/0_0_0.parquet\n", | |
"Resolving blazingsql-colab.s3.amazonaws.com (blazingsql-colab.s3.amazonaws.com)... 52.216.114.155\n", | |
"Connecting to blazingsql-colab.s3.amazonaws.com (blazingsql-colab.s3.amazonaws.com)|52.216.114.155|:443... connected.\n", | |
"HTTP request sent, awaiting response... 200 OK\n", | |
"Length: 176921950 (169M) [application/x-www-form-urlencoded]\n", | |
"Saving to: ‘0_0_0.parquet’\n", | |
"\n", | |
"0_0_0.parquet 100%[===================>] 168.73M 83.3MB/s in 2.0s \n", | |
"\n", | |
"2020-01-14 22:15:37 (83.3 MB/s) - ‘0_0_0.parquet’ saved [176921950/176921950]\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"!wget https://blazingsql-colab.s3.amazonaws.com/tpch_sf1/lineitem/0_0_0.parquet" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<pyblazing.apiv2.context.BlazingTable at 0x7fb80ec856d8>" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"bc.create_table('lineitem', '/home/jupyter-rodrigo/0_0_0.parquet')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Write/Read ORC" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 2.01 s, sys: 798 ms, total: 2.81 s\n", | |
"Wall time: 2.35 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"#BSQL Write ORC\n", | |
"bc.sql('select * from lineitem').to_orc('lineitem.orc', index=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<pyblazing.apiv2.context.BlazingTable at 0x7fb80ec97898>" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"bc.create_table('lineitem_orc', '/home/jupyter-rodrigo/lineitem.orc')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### ORC Read Benchmark" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 708 ms, sys: 327 ms, total: 1.04 s\n", | |
"Wall time: 1.01 s\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>l_orderkey</th>\n", | |
" <th>l_partkey</th>\n", | |
" <th>l_suppkey</th>\n", | |
" <th>l_linenumber</th>\n", | |
" <th>l_quantity</th>\n", | |
" <th>l_extendedprice</th>\n", | |
" <th>l_discount</th>\n", | |
" <th>l_tax</th>\n", | |
" <th>l_returnflag</th>\n", | |
" <th>l_linestatus</th>\n", | |
" <th>l_shipdate</th>\n", | |
" <th>l_commitdate</th>\n", | |
" <th>l_receiptdate</th>\n", | |
" <th>l_shipinstruct</th>\n", | |
" <th>l_shipmode</th>\n", | |
" <th>l_comment</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>155190</td>\n", | |
" <td>7706</td>\n", | |
" <td>1</td>\n", | |
" <td>17.0</td>\n", | |
" <td>21168.230469</td>\n", | |
" <td>0.04</td>\n", | |
" <td>0.02</td>\n", | |
" <td>N</td>\n", | |
" <td>O</td>\n", | |
" <td>1996-03-13</td>\n", | |
" <td>1996-02-12</td>\n", | |
" <td>1996-03-22</td>\n", | |
" <td>DELIVER IN PERSON</td>\n", | |
" <td>TRUCK</td>\n", | |
" <td>egular courts above the</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>67310</td>\n", | |
" <td>7311</td>\n", | |
" <td>2</td>\n", | |
" <td>36.0</td>\n", | |
" <td>45983.160156</td>\n", | |
" <td>0.09</td>\n", | |
" <td>0.06</td>\n", | |
" <td>N</td>\n", | |
" <td>O</td>\n", | |
" <td>1996-04-12</td>\n", | |
" <td>1996-02-28</td>\n", | |
" <td>1996-04-20</td>\n", | |
" <td>TAKE BACK RETURN</td>\n", | |
" <td>MAIL</td>\n", | |
" <td>ly final dependencies: slyly bold</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>63700</td>\n", | |
" <td>3701</td>\n", | |
" <td>3</td>\n", | |
" <td>8.0</td>\n", | |
" <td>13309.599609</td>\n", | |
" <td>0.10</td>\n", | |
" <td>0.02</td>\n", | |
" <td>N</td>\n", | |
" <td>O</td>\n", | |
" <td>1996-01-29</td>\n", | |
" <td>1996-03-05</td>\n", | |
" <td>1996-01-31</td>\n", | |
" <td>TAKE BACK RETURN</td>\n", | |
" <td>REG AIR</td>\n", | |
" <td>riously. regular, express dep</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>2132</td>\n", | |
" <td>4633</td>\n", | |
" <td>4</td>\n", | |
" <td>28.0</td>\n", | |
" <td>28955.640625</td>\n", | |
" <td>0.09</td>\n", | |
" <td>0.06</td>\n", | |
" <td>N</td>\n", | |
" <td>O</td>\n", | |
" <td>1996-04-21</td>\n", | |
" <td>1996-03-30</td>\n", | |
" <td>1996-05-16</td>\n", | |
" <td>NONE</td>\n", | |
" <td>AIR</td>\n", | |
" <td>lites. fluffily even de</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>24027</td>\n", | |
" <td>1534</td>\n", | |
" <td>5</td>\n", | |
" <td>24.0</td>\n", | |
" <td>22824.480469</td>\n", | |
" <td>0.10</td>\n", | |
" <td>0.04</td>\n", | |
" <td>N</td>\n", | |
" <td>O</td>\n", | |
" <td>1996-03-30</td>\n", | |
" <td>1996-03-14</td>\n", | |
" <td>1996-04-01</td>\n", | |
" <td>NONE</td>\n", | |
" <td>FOB</td>\n", | |
" <td>pending foxes. slyly re</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" l_orderkey l_partkey l_suppkey l_linenumber l_quantity \\\n", | |
"0 1 155190 7706 1 17.0 \n", | |
"1 1 67310 7311 2 36.0 \n", | |
"2 1 63700 3701 3 8.0 \n", | |
"3 1 2132 4633 4 28.0 \n", | |
"4 1 24027 1534 5 24.0 \n", | |
"\n", | |
" l_extendedprice l_discount l_tax l_returnflag l_linestatus l_shipdate \\\n", | |
"0 21168.230469 0.04 0.02 N O 1996-03-13 \n", | |
"1 45983.160156 0.09 0.06 N O 1996-04-12 \n", | |
"2 13309.599609 0.10 0.02 N O 1996-01-29 \n", | |
"3 28955.640625 0.09 0.06 N O 1996-04-21 \n", | |
"4 22824.480469 0.10 0.04 N O 1996-03-30 \n", | |
"\n", | |
" l_commitdate l_receiptdate l_shipinstruct l_shipmode \\\n", | |
"0 1996-02-12 1996-03-22 DELIVER IN PERSON TRUCK \n", | |
"1 1996-02-28 1996-04-20 TAKE BACK RETURN MAIL \n", | |
"2 1996-03-05 1996-01-31 TAKE BACK RETURN REG AIR \n", | |
"3 1996-03-30 1996-05-16 NONE AIR \n", | |
"4 1996-03-14 1996-04-01 NONE FOB \n", | |
"\n", | |
" l_comment \n", | |
"0 egular courts above the \n", | |
"1 ly final dependencies: slyly bold \n", | |
"2 riously. regular, express dep \n", | |
"3 lites. fluffily even de \n", | |
"4 pending foxes. slyly re " | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"#BSQL Read ORC\n", | |
"data = bc.sql('select * from lineitem')\n", | |
"data.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 181 ms, sys: 35.1 ms, total: 217 ms\n", | |
"Wall time: 215 ms\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>l_orderkey</th>\n", | |
" <th>l_partkey</th>\n", | |
" <th>l_suppkey</th>\n", | |
" <th>l_linenumber</th>\n", | |
" <th>l_quantity</th>\n", | |
" <th>l_extendedprice</th>\n", | |
" <th>l_discount</th>\n", | |
" <th>l_tax</th>\n", | |
" <th>l_returnflag</th>\n", | |
" <th>l_linestatus</th>\n", | |
" <th>l_shipdate</th>\n", | |
" <th>l_commitdate</th>\n", | |
" <th>l_receiptdate</th>\n", | |
" <th>l_shipinstruct</th>\n", | |
" <th>l_shipmode</th>\n", | |
" <th>l_comment</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>155190</td>\n", | |
" <td>7706</td>\n", | |
" <td>1</td>\n", | |
" <td>17.0</td>\n", | |
" <td>21168.230469</td>\n", | |
" <td>0.04</td>\n", | |
" <td>0.02</td>\n", | |
" <td>N</td>\n", | |
" <td>O</td>\n", | |
" <td>1996-03-13</td>\n", | |
" <td>1996-02-12</td>\n", | |
" <td>1996-03-22</td>\n", | |
" <td>DELIVER IN PERSON</td>\n", | |
" <td>TRUCK</td>\n", | |
" <td>egular courts above the</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>67310</td>\n", | |
" <td>7311</td>\n", | |
" <td>2</td>\n", | |
" <td>36.0</td>\n", | |
" <td>45983.160156</td>\n", | |
" <td>0.09</td>\n", | |
" <td>0.06</td>\n", | |
" <td>N</td>\n", | |
" <td>O</td>\n", | |
" <td>1996-04-12</td>\n", | |
" <td>1996-02-28</td>\n", | |
" <td>1996-04-20</td>\n", | |
" <td>TAKE BACK RETURN</td>\n", | |
" <td>MAIL</td>\n", | |
" <td>ly final dependencies: slyly bold</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>63700</td>\n", | |
" <td>3701</td>\n", | |
" <td>3</td>\n", | |
" <td>8.0</td>\n", | |
" <td>13309.599609</td>\n", | |
" <td>0.10</td>\n", | |
" <td>0.02</td>\n", | |
" <td>N</td>\n", | |
" <td>O</td>\n", | |
" <td>1996-01-29</td>\n", | |
" <td>1996-03-05</td>\n", | |
" <td>1996-01-31</td>\n", | |
" <td>TAKE BACK RETURN</td>\n", | |
" <td>REG AIR</td>\n", | |
" <td>riously. regular, express dep</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>2132</td>\n", | |
" <td>4633</td>\n", | |
" <td>4</td>\n", | |
" <td>28.0</td>\n", | |
" <td>28955.640625</td>\n", | |
" <td>0.09</td>\n", | |
" <td>0.06</td>\n", | |
" <td>N</td>\n", | |
" <td>O</td>\n", | |
" <td>1996-04-21</td>\n", | |
" <td>1996-03-30</td>\n", | |
" <td>1996-05-16</td>\n", | |
" <td>NONE</td>\n", | |
" <td>AIR</td>\n", | |
" <td>lites. fluffily even de</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1</td>\n", | |
" <td>24027</td>\n", | |
" <td>1534</td>\n", | |
" <td>5</td>\n", | |
" <td>24.0</td>\n", | |
" <td>22824.480469</td>\n", | |
" <td>0.10</td>\n", | |
" <td>0.04</td>\n", | |
" <td>N</td>\n", | |
" <td>O</td>\n", | |
" <td>1996-03-30</td>\n", | |
" <td>1996-03-14</td>\n", | |
" <td>1996-04-01</td>\n", | |
" <td>NONE</td>\n", | |
" <td>FOB</td>\n", | |
" <td>pending foxes. slyly re</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" l_orderkey l_partkey l_suppkey l_linenumber l_quantity \\\n", | |
"0 1 155190 7706 1 17.0 \n", | |
"1 1 67310 7311 2 36.0 \n", | |
"2 1 63700 3701 3 8.0 \n", | |
"3 1 2132 4633 4 28.0 \n", | |
"4 1 24027 1534 5 24.0 \n", | |
"\n", | |
" l_extendedprice l_discount l_tax l_returnflag l_linestatus l_shipdate \\\n", | |
"0 21168.230469 0.04 0.02 N O 1996-03-13 \n", | |
"1 45983.160156 0.09 0.06 N O 1996-04-12 \n", | |
"2 13309.599609 0.10 0.02 N O 1996-01-29 \n", | |
"3 28955.640625 0.09 0.06 N O 1996-04-21 \n", | |
"4 22824.480469 0.10 0.04 N O 1996-03-30 \n", | |
"\n", | |
" l_commitdate l_receiptdate l_shipinstruct l_shipmode \\\n", | |
"0 1996-02-12 1996-03-22 DELIVER IN PERSON TRUCK \n", | |
"1 1996-02-28 1996-04-20 TAKE BACK RETURN MAIL \n", | |
"2 1996-03-05 1996-01-31 TAKE BACK RETURN REG AIR \n", | |
"3 1996-03-30 1996-05-16 NONE AIR \n", | |
"4 1996-03-14 1996-04-01 NONE FOB \n", | |
"\n", | |
" l_comment \n", | |
"0 egular courts above the \n", | |
"1 ly final dependencies: slyly bold \n", | |
"2 riously. regular, express dep \n", | |
"3 lites. fluffily even de \n", | |
"4 pending foxes. slyly re " | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"#cuDF Read ORC\n", | |
"data = cudf.read_orc('lineitem.orc')\n", | |
"data.head()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.7" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment