Created
February 14, 2017 07:39
-
-
Save akameco/a5f70cdb34de4ad176104145be65739e to your computer and use it in GitHub Desktop.
pandas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"\n", | |
"df_sample = pd.DataFrame([\n", | |
" [\"day1\", \"day2\", \"day1\", \"day2\", \"day1\", \"day2\"],\n", | |
" [\"A\", \"B\", \"A\", \"B\", \"C\", \"C\"],\n", | |
" [100, 150, 200, 150, 100, 50],\n", | |
" [120, 160, 100, 180, 110, 80]\n", | |
" ]).T" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"RangeIndex(start=0, stop=4, step=1)" | |
] | |
}, | |
"execution_count": 22, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_sample.columns" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"RangeIndex(start=0, stop=6, step=1)" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_sample.index " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df_sample.columns = [\"day_no\",\"class\",\"point1\",\"point2\"]\n", | |
"df_sample.index = [11,12,13,14,15,16] # インデックス名を上書きする" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>day_no</th>\n", | |
" <th>class</th>\n", | |
" <th>point1</th>\n", | |
" <th>point2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>100</td>\n", | |
" <td>120</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>day2</td>\n", | |
" <td>B</td>\n", | |
" <td>150</td>\n", | |
" <td>160</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>200</td>\n", | |
" <td>100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>day2</td>\n", | |
" <td>B</td>\n", | |
" <td>150</td>\n", | |
" <td>180</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>day1</td>\n", | |
" <td>C</td>\n", | |
" <td>100</td>\n", | |
" <td>110</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>day2</td>\n", | |
" <td>C</td>\n", | |
" <td>50</td>\n", | |
" <td>80</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" day_no class point1 point2\n", | |
"11 day1 A 100 120\n", | |
"12 day2 B 150 160\n", | |
"13 day1 A 200 100\n", | |
"14 day2 B 150 180\n", | |
"15 day1 C 100 110\n", | |
"16 day2 C 50 80" | |
] | |
}, | |
"execution_count": 28, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_sample.rename(columns={'score1': 'point1'})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"6" | |
] | |
}, | |
"execution_count": 30, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(df_sample)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(6, 4)" | |
] | |
}, | |
"execution_count": 32, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_sample.shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"Int64Index: 6 entries, 11 to 16\n", | |
"Data columns (total 4 columns):\n", | |
"day_no 6 non-null object\n", | |
"class 6 non-null object\n", | |
"point1 6 non-null object\n", | |
"point2 6 non-null object\n", | |
"dtypes: object(4)\n", | |
"memory usage: 240.0+ bytes\n" | |
] | |
} | |
], | |
"source": [ | |
"df_sample.info()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>day_no</th>\n", | |
" <th>class</th>\n", | |
" <th>point1</th>\n", | |
" <th>point2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>count</th>\n", | |
" <td>6</td>\n", | |
" <td>6</td>\n", | |
" <td>6</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>unique</th>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>4</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>top</th>\n", | |
" <td>day1</td>\n", | |
" <td>C</td>\n", | |
" <td>150</td>\n", | |
" <td>110</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>freq</th>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" day_no class point1 point2\n", | |
"count 6 6 6 6\n", | |
"unique 2 3 4 6\n", | |
"top day1 C 150 110\n", | |
"freq 3 2 2 1" | |
] | |
}, | |
"execution_count": 38, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_sample.describe()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>day_no</th>\n", | |
" <th>class</th>\n", | |
" <th>point1</th>\n", | |
" <th>point2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>100</td>\n", | |
" <td>120</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>day2</td>\n", | |
" <td>B</td>\n", | |
" <td>150</td>\n", | |
" <td>160</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>200</td>\n", | |
" <td>100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>day2</td>\n", | |
" <td>B</td>\n", | |
" <td>150</td>\n", | |
" <td>180</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>day1</td>\n", | |
" <td>C</td>\n", | |
" <td>100</td>\n", | |
" <td>110</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>day2</td>\n", | |
" <td>C</td>\n", | |
" <td>50</td>\n", | |
" <td>80</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" day_no class point1 point2\n", | |
"11 day1 A 100 120\n", | |
"12 day2 B 150 160\n", | |
"13 day1 A 200 100\n", | |
"14 day2 B 150 180\n", | |
"15 day1 C 100 110\n", | |
"16 day2 C 50 80" | |
] | |
}, | |
"execution_count": 39, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_sample.head(10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>day_no</th>\n", | |
" <th>class</th>\n", | |
" <th>point1</th>\n", | |
" <th>point2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>day2</td>\n", | |
" <td>C</td>\n", | |
" <td>50</td>\n", | |
" <td>80</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" day_no class point1 point2\n", | |
"16 day2 C 50 80" | |
] | |
}, | |
"execution_count": 41, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_sample.tail(1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 43, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"11 day1\n", | |
"12 day2\n", | |
"13 day1\n", | |
"14 day2\n", | |
"15 day1\n", | |
"16 day2\n", | |
"Name: day_no, dtype: object" | |
] | |
}, | |
"execution_count": 43, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_sample[\"day_no\"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 46, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>day_no</th>\n", | |
" <th>point1</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>day1</td>\n", | |
" <td>100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>day2</td>\n", | |
" <td>150</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>day1</td>\n", | |
" <td>200</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>day2</td>\n", | |
" <td>150</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>day1</td>\n", | |
" <td>100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>day2</td>\n", | |
" <td>50</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" day_no point1\n", | |
"11 day1 100\n", | |
"12 day2 150\n", | |
"13 day1 200\n", | |
"14 day2 150\n", | |
"15 day1 100\n", | |
"16 day2 50" | |
] | |
}, | |
"execution_count": 46, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_sample[[\"day_no\", \"point1\"]]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 60, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>day_no</th>\n", | |
" <th>class</th>\n", | |
" <th>point1</th>\n", | |
" <th>point2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>100</td>\n", | |
" <td>120</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>day2</td>\n", | |
" <td>B</td>\n", | |
" <td>150</td>\n", | |
" <td>160</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>200</td>\n", | |
" <td>100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>day2</td>\n", | |
" <td>B</td>\n", | |
" <td>150</td>\n", | |
" <td>180</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>day1</td>\n", | |
" <td>C</td>\n", | |
" <td>100</td>\n", | |
" <td>110</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>day2</td>\n", | |
" <td>C</td>\n", | |
" <td>50</td>\n", | |
" <td>80</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" day_no class point1 point2\n", | |
"11 day1 A 100 120\n", | |
"12 day2 B 150 160\n", | |
"13 day1 A 200 100\n", | |
"14 day2 B 150 180\n", | |
"15 day1 C 100 110\n", | |
"16 day2 C 50 80" | |
] | |
}, | |
"execution_count": 60, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_sample.rename(columns={'score1': 'point1'})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 66, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>point1</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>150</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>200</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>150</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>50</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" point1\n", | |
"11 100\n", | |
"12 150\n", | |
"13 200\n", | |
"14 150\n", | |
"15 100\n", | |
"16 50" | |
] | |
}, | |
"execution_count": 66, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import numpy as np\n", | |
"\n", | |
"score_select = pd.Series(df_sample.columns).str.contains(\"point1\")\n", | |
"df_sample.ix[:, np.array(score_select)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 71, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>day_no</th>\n", | |
" <th>class</th>\n", | |
" <th>point1</th>\n", | |
" <th>point2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>100</td>\n", | |
" <td>120</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>200</td>\n", | |
" <td>100</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" day_no class point1 point2\n", | |
"11 day1 A 100 120\n", | |
"13 day1 A 200 100" | |
] | |
}, | |
"execution_count": 71, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_sample[df_sample.day_no == 'day1']\n", | |
"series_bool = [True, False, True, False, False, False]\n", | |
"df_sample[series_bool]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 81, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>day_no</th>\n", | |
" <th>class</th>\n", | |
" <th>point1</th>\n", | |
" <th>point2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>100</td>\n", | |
" <td>120</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>day1</td>\n", | |
" <td>C</td>\n", | |
" <td>100</td>\n", | |
" <td>110</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" day_no class point1 point2\n", | |
"11 day1 A 100 120\n", | |
"15 day1 C 100 110" | |
] | |
}, | |
"execution_count": 81, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_sample.query('point1 == 100') " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 83, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df = df_sample" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 85, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>day_no</th>\n", | |
" <th>class</th>\n", | |
" <th>point1</th>\n", | |
" <th>point2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>100</td>\n", | |
" <td>120</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>day2</td>\n", | |
" <td>B</td>\n", | |
" <td>150</td>\n", | |
" <td>160</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>200</td>\n", | |
" <td>100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>day2</td>\n", | |
" <td>B</td>\n", | |
" <td>150</td>\n", | |
" <td>180</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>day1</td>\n", | |
" <td>C</td>\n", | |
" <td>100</td>\n", | |
" <td>110</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>day2</td>\n", | |
" <td>C</td>\n", | |
" <td>50</td>\n", | |
" <td>80</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" day_no class point1 point2\n", | |
"11 day1 A 100 120\n", | |
"12 day2 B 150 160\n", | |
"13 day1 A 200 100\n", | |
"14 day2 B 150 180\n", | |
"15 day1 C 100 110\n", | |
"16 day2 C 50 80" | |
] | |
}, | |
"execution_count": 85, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 97, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>day_no</th>\n", | |
" <th>class</th>\n", | |
" <th>point1</th>\n", | |
" <th>point2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>100</td>\n", | |
" <td>120</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>day2</td>\n", | |
" <td>B</td>\n", | |
" <td>150</td>\n", | |
" <td>160</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>200</td>\n", | |
" <td>100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>day2</td>\n", | |
" <td>B</td>\n", | |
" <td>150</td>\n", | |
" <td>180</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>day1</td>\n", | |
" <td>C</td>\n", | |
" <td>100</td>\n", | |
" <td>110</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>day2</td>\n", | |
" <td>C</td>\n", | |
" <td>50</td>\n", | |
" <td>80</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" day_no class point1 point2\n", | |
"11 day1 A 100 120\n", | |
"12 day2 B 150 160\n", | |
"13 day1 A 200 100\n", | |
"14 day2 B 150 180\n", | |
"15 day1 C 100 110\n", | |
"16 day2 C 50 80" | |
] | |
}, | |
"execution_count": 97, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.query('day_no == \"day1\"')\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 99, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>day_no</th>\n", | |
" <th>class</th>\n", | |
" <th>point1</th>\n", | |
" <th>point2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>day2</td>\n", | |
" <td>C</td>\n", | |
" <td>50</td>\n", | |
" <td>80</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>100</td>\n", | |
" <td>120</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>day1</td>\n", | |
" <td>C</td>\n", | |
" <td>100</td>\n", | |
" <td>110</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>day2</td>\n", | |
" <td>B</td>\n", | |
" <td>150</td>\n", | |
" <td>160</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>day2</td>\n", | |
" <td>B</td>\n", | |
" <td>150</td>\n", | |
" <td>180</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>200</td>\n", | |
" <td>100</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" day_no class point1 point2\n", | |
"16 day2 C 50 80\n", | |
"11 day1 A 100 120\n", | |
"15 day1 C 100 110\n", | |
"12 day2 B 150 160\n", | |
"14 day2 B 150 180\n", | |
"13 day1 A 200 100" | |
] | |
}, | |
"execution_count": 99, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.sort_values(\"point1\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 101, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>day_no</th>\n", | |
" <th>class</th>\n", | |
" <th>point1</th>\n", | |
" <th>point2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>day2</td>\n", | |
" <td>C</td>\n", | |
" <td>50</td>\n", | |
" <td>80</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>day1</td>\n", | |
" <td>C</td>\n", | |
" <td>100</td>\n", | |
" <td>110</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>100</td>\n", | |
" <td>120</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>day2</td>\n", | |
" <td>B</td>\n", | |
" <td>150</td>\n", | |
" <td>160</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>day2</td>\n", | |
" <td>B</td>\n", | |
" <td>150</td>\n", | |
" <td>180</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>200</td>\n", | |
" <td>100</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" day_no class point1 point2\n", | |
"16 day2 C 50 80\n", | |
"15 day1 C 100 110\n", | |
"11 day1 A 100 120\n", | |
"12 day2 B 150 160\n", | |
"14 day2 B 150 180\n", | |
"13 day1 A 200 100" | |
] | |
}, | |
"execution_count": 101, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.sort_values([\"point1\", \"point2\"])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 103, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>day_no</th>\n", | |
" <th>class</th>\n", | |
" <th>point1</th>\n", | |
" <th>point2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>200</td>\n", | |
" <td>100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>day2</td>\n", | |
" <td>B</td>\n", | |
" <td>150</td>\n", | |
" <td>160</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>day2</td>\n", | |
" <td>B</td>\n", | |
" <td>150</td>\n", | |
" <td>180</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>100</td>\n", | |
" <td>120</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>day1</td>\n", | |
" <td>C</td>\n", | |
" <td>100</td>\n", | |
" <td>110</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>day2</td>\n", | |
" <td>C</td>\n", | |
" <td>50</td>\n", | |
" <td>80</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" day_no class point1 point2\n", | |
"13 day1 A 200 100\n", | |
"12 day2 B 150 160\n", | |
"14 day2 B 150 180\n", | |
"11 day1 A 100 120\n", | |
"15 day1 C 100 110\n", | |
"16 day2 C 50 80" | |
] | |
}, | |
"execution_count": 103, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.sort_values(\"point1\", ascending=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 114, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>day_no</th>\n", | |
" <th>class</th>\n", | |
" <th>point1</th>\n", | |
" <th>point2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>100</td>\n", | |
" <td>120</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>day2</td>\n", | |
" <td>B</td>\n", | |
" <td>150</td>\n", | |
" <td>160</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>200</td>\n", | |
" <td>100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>day2</td>\n", | |
" <td>B</td>\n", | |
" <td>150</td>\n", | |
" <td>180</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>day1</td>\n", | |
" <td>C</td>\n", | |
" <td>100</td>\n", | |
" <td>110</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>day2</td>\n", | |
" <td>C</td>\n", | |
" <td>50</td>\n", | |
" <td>80</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>100</td>\n", | |
" <td>180</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" day_no class point1 point2\n", | |
"11 day1 A 100 120\n", | |
"12 day2 B 150 160\n", | |
"13 day1 A 200 100\n", | |
"14 day2 B 150 180\n", | |
"15 day1 C 100 110\n", | |
"16 day2 C 50 80\n", | |
"17 day1 A 100 180" | |
] | |
}, | |
"execution_count": 114, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df_add = pd.DataFrame([[\"day1\", \"A\",100,180]])\n", | |
"df_add.columns = [\"day_no\", \"class\", \"point1\", \"point2\"]\n", | |
"df_add.index = [17]\n", | |
"\n", | |
"pd.concat([df, df_add], axis=0)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 120, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df_cal = pd.DataFrame([[120, 160, 100, 180, 110, 80]]).T\n", | |
"df_cal.columns = [\"score3\"]\n", | |
"df_cal.index = [11, 12, 13, 14, 15, 16]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 127, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>day_no</th>\n", | |
" <th>class</th>\n", | |
" <th>point1</th>\n", | |
" <th>point2</th>\n", | |
" <th>score3</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>100</td>\n", | |
" <td>120</td>\n", | |
" <td>120</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>day2</td>\n", | |
" <td>B</td>\n", | |
" <td>150</td>\n", | |
" <td>160</td>\n", | |
" <td>160</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>day1</td>\n", | |
" <td>A</td>\n", | |
" <td>200</td>\n", | |
" <td>100</td>\n", | |
" <td>100</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>day2</td>\n", | |
" <td>B</td>\n", | |
" <td>150</td>\n", | |
" <td>180</td>\n", | |
" <td>180</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>day1</td>\n", | |
" <td>C</td>\n", | |
" <td>100</td>\n", | |
" <td>110</td>\n", | |
" <td>110</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>day2</td>\n", | |
" <td>C</td>\n", | |
" <td>50</td>\n", | |
" <td>80</td>\n", | |
" <td>80</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" day_no class point1 point2 score3\n", | |
"11 day1 A 100 120 120\n", | |
"12 day2 B 150 160 160\n", | |
"13 day1 A 200 100 100\n", | |
"14 day2 B 150 180 180\n", | |
"15 day1 C 100 110 110\n", | |
"16 day2 C 50 80 80" | |
] | |
}, | |
"execution_count": 127, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pd.concat([df, df_cal], axis=1)\n", | |
"\n", | |
"# df_addition_col = pd.DataFrame([[120,160,100,180,110,80]]).T\n", | |
"# df_addition_col.columns =[\"score3\"]\n", | |
"# df_addition_col.index = [11,12,13,21,22,23]\n", | |
"# df_addition_col\n", | |
"# pd.concat([df,df_addition_col],axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 139, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"day_no\n", | |
"day1 400\n", | |
"day2 350\n", | |
"Name: point1, dtype: int64" | |
] | |
}, | |
"execution_count": 139, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df[\"point1\"].sum(axis=0)\n", | |
"df[[\"point1\", \"point2\"]].sum(axis=1)\n", | |
"df.pivot_table('point1', aggfunc='sum', fill_value=0, columns=\"day_no\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 144, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>day_no</th>\n", | |
" <th>point1</th>\n", | |
" <th>point2</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>day1</td>\n", | |
" <td>400</td>\n", | |
" <td>330</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>day2</td>\n", | |
" <td>350</td>\n", | |
" <td>420</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" day_no point1 point2\n", | |
"0 day1 400 330\n", | |
"1 day2 350 420" | |
] | |
}, | |
"execution_count": 144, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"grouped = df.groupby(\"day_no\", as_index=False)\n", | |
"grouped[[\"point1\", \"point2\"]].sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 145, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df.to_csv(\"outpu.csv\")" | |
] | |
} | |
], | |
"metadata": { | |
"anaconda-cloud": {}, | |
"kernelspec": { | |
"display_name": "Python [conda env:tensorflow]", | |
"language": "python", | |
"name": "conda-env-tensorflow-py" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment