Skip to content

Instantly share code, notes, and snippets.

@daxiongshu
Last active July 13, 2019 01:05
Show Gist options
  • Save daxiongshu/b47b2fda363ce55943afa3d3224e4958 to your computer and use it in GitHub Desktop.
Save daxiongshu/b47b2fda363ce55943afa3d3224e4958 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import cudf as gd\n",
"import pandas as pd\n",
"import time"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(200000, 202)\n",
"CPU times: user 252 ms, sys: 252 ms, total: 504 ms\n",
"Wall time: 509 ms\n"
]
}
],
"source": [
"%%time\n",
"PATH = '../input'\n",
"cols = ['ID_code', 'target'] + ['var_%d'%i for i in range(200)]\n",
"dtypes = ['int32', 'int32'] + ['float32' for i in range(200)]\n",
"train_gd = gd.read_csv('%s/train.csv'%PATH,names=cols,dtype=dtypes,skiprows=1)\n",
"print(train_gd.shape)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(200000, 202)\n",
"CPU times: user 5.08 s, sys: 404 ms, total: 5.48 s\n",
"Wall time: 5.48 s\n"
]
}
],
"source": [
"%%time\n",
"train_pd = pd.read_csv('%s/train.csv'%PATH)\n",
"print(train_pd.shape)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ID_code</th>\n",
" <th>target</th>\n",
" <th>var_0</th>\n",
" <th>var_1</th>\n",
" <th>var_2</th>\n",
" <th>var_3</th>\n",
" <th>var_4</th>\n",
" <th>var_5</th>\n",
" <th>var_6</th>\n",
" <th>var_7</th>\n",
" <th>...</th>\n",
" <th>var_190</th>\n",
" <th>var_191</th>\n",
" <th>var_192</th>\n",
" <th>var_193</th>\n",
" <th>var_194</th>\n",
" <th>var_195</th>\n",
" <th>var_196</th>\n",
" <th>var_197</th>\n",
" <th>var_198</th>\n",
" <th>var_199</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>75153670</td>\n",
" <td>0</td>\n",
" <td>8.925500</td>\n",
" <td>-6.7863</td>\n",
" <td>11.908100</td>\n",
" <td>5.0930</td>\n",
" <td>11.460700</td>\n",
" <td>-9.2834</td>\n",
" <td>5.1187</td>\n",
" <td>18.626602</td>\n",
" <td>...</td>\n",
" <td>4.4354</td>\n",
" <td>3.964200</td>\n",
" <td>3.1364</td>\n",
" <td>1.691000</td>\n",
" <td>18.522701</td>\n",
" <td>-2.3978</td>\n",
" <td>7.8784</td>\n",
" <td>8.5635</td>\n",
" <td>12.780300</td>\n",
" <td>-1.091400</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>75153671</td>\n",
" <td>0</td>\n",
" <td>11.500600</td>\n",
" <td>-4.1473</td>\n",
" <td>13.858801</td>\n",
" <td>5.3890</td>\n",
" <td>12.362201</td>\n",
" <td>7.0433</td>\n",
" <td>5.6208</td>\n",
" <td>16.533800</td>\n",
" <td>...</td>\n",
" <td>7.6421</td>\n",
" <td>7.721400</td>\n",
" <td>2.5837</td>\n",
" <td>10.951600</td>\n",
" <td>15.430499</td>\n",
" <td>2.0339</td>\n",
" <td>8.1267</td>\n",
" <td>8.7889</td>\n",
" <td>18.355999</td>\n",
" <td>1.951800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>75153672</td>\n",
" <td>0</td>\n",
" <td>8.609301</td>\n",
" <td>-2.7457</td>\n",
" <td>12.080500</td>\n",
" <td>7.8928</td>\n",
" <td>10.582500</td>\n",
" <td>-9.0837</td>\n",
" <td>6.9427</td>\n",
" <td>14.615500</td>\n",
" <td>...</td>\n",
" <td>2.9057</td>\n",
" <td>9.790500</td>\n",
" <td>1.6704</td>\n",
" <td>1.685800</td>\n",
" <td>21.604200</td>\n",
" <td>3.1417</td>\n",
" <td>-6.5213</td>\n",
" <td>8.2675</td>\n",
" <td>14.722200</td>\n",
" <td>0.396500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>75153673</td>\n",
" <td>0</td>\n",
" <td>11.060400</td>\n",
" <td>-2.1518</td>\n",
" <td>8.952200</td>\n",
" <td>7.1957</td>\n",
" <td>12.584599</td>\n",
" <td>-1.8361</td>\n",
" <td>5.8428</td>\n",
" <td>14.925000</td>\n",
" <td>...</td>\n",
" <td>4.4666</td>\n",
" <td>4.743299</td>\n",
" <td>0.7178</td>\n",
" <td>1.421400</td>\n",
" <td>23.034700</td>\n",
" <td>-1.2706</td>\n",
" <td>-2.9275</td>\n",
" <td>10.2922</td>\n",
" <td>17.969700</td>\n",
" <td>-8.999599</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>75153674</td>\n",
" <td>0</td>\n",
" <td>9.836900</td>\n",
" <td>-1.4834</td>\n",
" <td>12.874599</td>\n",
" <td>6.6375</td>\n",
" <td>12.277200</td>\n",
" <td>2.4486</td>\n",
" <td>5.9405</td>\n",
" <td>19.251400</td>\n",
" <td>...</td>\n",
" <td>-1.4905</td>\n",
" <td>9.521400</td>\n",
" <td>-0.1508</td>\n",
" <td>9.194201</td>\n",
" <td>13.287600</td>\n",
" <td>-1.5121</td>\n",
" <td>3.9267</td>\n",
" <td>9.5031</td>\n",
" <td>17.997400</td>\n",
" <td>-8.810400</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 202 columns</p>\n",
"</div>"
],
"text/plain": [
" ID_code target var_0 var_1 var_2 var_3 var_4 var_5 \\\n",
"0 75153670 0 8.925500 -6.7863 11.908100 5.0930 11.460700 -9.2834 \n",
"1 75153671 0 11.500600 -4.1473 13.858801 5.3890 12.362201 7.0433 \n",
"2 75153672 0 8.609301 -2.7457 12.080500 7.8928 10.582500 -9.0837 \n",
"3 75153673 0 11.060400 -2.1518 8.952200 7.1957 12.584599 -1.8361 \n",
"4 75153674 0 9.836900 -1.4834 12.874599 6.6375 12.277200 2.4486 \n",
"\n",
" var_6 var_7 ... var_190 var_191 var_192 var_193 var_194 \\\n",
"0 5.1187 18.626602 ... 4.4354 3.964200 3.1364 1.691000 18.522701 \n",
"1 5.6208 16.533800 ... 7.6421 7.721400 2.5837 10.951600 15.430499 \n",
"2 6.9427 14.615500 ... 2.9057 9.790500 1.6704 1.685800 21.604200 \n",
"3 5.8428 14.925000 ... 4.4666 4.743299 0.7178 1.421400 23.034700 \n",
"4 5.9405 19.251400 ... -1.4905 9.521400 -0.1508 9.194201 13.287600 \n",
"\n",
" var_195 var_196 var_197 var_198 var_199 \n",
"0 -2.3978 7.8784 8.5635 12.780300 -1.091400 \n",
"1 2.0339 8.1267 8.7889 18.355999 1.951800 \n",
"2 3.1417 -6.5213 8.2675 14.722200 0.396500 \n",
"3 -1.2706 -2.9275 10.2922 17.969700 -8.999599 \n",
"4 -1.5121 3.9267 9.5031 17.997400 -8.810400 \n",
"\n",
"[5 rows x 202 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_gd.head().to_pandas()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment