Skip to content

Instantly share code, notes, and snippets.

@akameco
Created February 14, 2017 07:39
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save akameco/a5f70cdb34de4ad176104145be65739e to your computer and use it in GitHub Desktop.
Save akameco/a5f70cdb34de4ad176104145be65739e to your computer and use it in GitHub Desktop.
pandas
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"df_sample = pd.DataFrame([\n",
" [\"day1\", \"day2\", \"day1\", \"day2\", \"day1\", \"day2\"],\n",
" [\"A\", \"B\", \"A\", \"B\", \"C\", \"C\"],\n",
" [100, 150, 200, 150, 100, 50],\n",
" [120, 160, 100, 180, 110, 80]\n",
" ]).T"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"RangeIndex(start=0, stop=4, step=1)"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_sample.columns"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"RangeIndex(start=0, stop=6, step=1)"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_sample.index "
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df_sample.columns = [\"day_no\",\"class\",\"point1\",\"point2\"]\n",
"df_sample.index = [11,12,13,14,15,16] # インデックス名を上書きする"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>day_no</th>\n",
" <th>class</th>\n",
" <th>point1</th>\n",
" <th>point2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>100</td>\n",
" <td>120</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>day2</td>\n",
" <td>B</td>\n",
" <td>150</td>\n",
" <td>160</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>200</td>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>day2</td>\n",
" <td>B</td>\n",
" <td>150</td>\n",
" <td>180</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>day1</td>\n",
" <td>C</td>\n",
" <td>100</td>\n",
" <td>110</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>day2</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>80</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" day_no class point1 point2\n",
"11 day1 A 100 120\n",
"12 day2 B 150 160\n",
"13 day1 A 200 100\n",
"14 day2 B 150 180\n",
"15 day1 C 100 110\n",
"16 day2 C 50 80"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_sample.rename(columns={'score1': 'point1'})"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"6"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(df_sample)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"(6, 4)"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_sample.shape"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 6 entries, 11 to 16\n",
"Data columns (total 4 columns):\n",
"day_no 6 non-null object\n",
"class 6 non-null object\n",
"point1 6 non-null object\n",
"point2 6 non-null object\n",
"dtypes: object(4)\n",
"memory usage: 240.0+ bytes\n"
]
}
],
"source": [
"df_sample.info()"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>day_no</th>\n",
" <th>class</th>\n",
" <th>point1</th>\n",
" <th>point2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>day1</td>\n",
" <td>C</td>\n",
" <td>150</td>\n",
" <td>110</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" day_no class point1 point2\n",
"count 6 6 6 6\n",
"unique 2 3 4 6\n",
"top day1 C 150 110\n",
"freq 3 2 2 1"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_sample.describe()"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>day_no</th>\n",
" <th>class</th>\n",
" <th>point1</th>\n",
" <th>point2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>100</td>\n",
" <td>120</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>day2</td>\n",
" <td>B</td>\n",
" <td>150</td>\n",
" <td>160</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>200</td>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>day2</td>\n",
" <td>B</td>\n",
" <td>150</td>\n",
" <td>180</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>day1</td>\n",
" <td>C</td>\n",
" <td>100</td>\n",
" <td>110</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>day2</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>80</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" day_no class point1 point2\n",
"11 day1 A 100 120\n",
"12 day2 B 150 160\n",
"13 day1 A 200 100\n",
"14 day2 B 150 180\n",
"15 day1 C 100 110\n",
"16 day2 C 50 80"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_sample.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>day_no</th>\n",
" <th>class</th>\n",
" <th>point1</th>\n",
" <th>point2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>day2</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>80</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" day_no class point1 point2\n",
"16 day2 C 50 80"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_sample.tail(1)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"11 day1\n",
"12 day2\n",
"13 day1\n",
"14 day2\n",
"15 day1\n",
"16 day2\n",
"Name: day_no, dtype: object"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_sample[\"day_no\"]"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>day_no</th>\n",
" <th>point1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>day1</td>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>day2</td>\n",
" <td>150</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>day1</td>\n",
" <td>200</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>day2</td>\n",
" <td>150</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>day1</td>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>day2</td>\n",
" <td>50</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" day_no point1\n",
"11 day1 100\n",
"12 day2 150\n",
"13 day1 200\n",
"14 day2 150\n",
"15 day1 100\n",
"16 day2 50"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_sample[[\"day_no\", \"point1\"]]"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>day_no</th>\n",
" <th>class</th>\n",
" <th>point1</th>\n",
" <th>point2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>100</td>\n",
" <td>120</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>day2</td>\n",
" <td>B</td>\n",
" <td>150</td>\n",
" <td>160</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>200</td>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>day2</td>\n",
" <td>B</td>\n",
" <td>150</td>\n",
" <td>180</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>day1</td>\n",
" <td>C</td>\n",
" <td>100</td>\n",
" <td>110</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>day2</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>80</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" day_no class point1 point2\n",
"11 day1 A 100 120\n",
"12 day2 B 150 160\n",
"13 day1 A 200 100\n",
"14 day2 B 150 180\n",
"15 day1 C 100 110\n",
"16 day2 C 50 80"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_sample.rename(columns={'score1': 'point1'})"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>point1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>150</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>200</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>150</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>50</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" point1\n",
"11 100\n",
"12 150\n",
"13 200\n",
"14 150\n",
"15 100\n",
"16 50"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"\n",
"score_select = pd.Series(df_sample.columns).str.contains(\"point1\")\n",
"df_sample.ix[:, np.array(score_select)]"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>day_no</th>\n",
" <th>class</th>\n",
" <th>point1</th>\n",
" <th>point2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>100</td>\n",
" <td>120</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>200</td>\n",
" <td>100</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" day_no class point1 point2\n",
"11 day1 A 100 120\n",
"13 day1 A 200 100"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_sample[df_sample.day_no == 'day1']\n",
"series_bool = [True, False, True, False, False, False]\n",
"df_sample[series_bool]"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>day_no</th>\n",
" <th>class</th>\n",
" <th>point1</th>\n",
" <th>point2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>100</td>\n",
" <td>120</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>day1</td>\n",
" <td>C</td>\n",
" <td>100</td>\n",
" <td>110</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" day_no class point1 point2\n",
"11 day1 A 100 120\n",
"15 day1 C 100 110"
]
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_sample.query('point1 == 100') "
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df = df_sample"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>day_no</th>\n",
" <th>class</th>\n",
" <th>point1</th>\n",
" <th>point2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>100</td>\n",
" <td>120</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>day2</td>\n",
" <td>B</td>\n",
" <td>150</td>\n",
" <td>160</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>200</td>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>day2</td>\n",
" <td>B</td>\n",
" <td>150</td>\n",
" <td>180</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>day1</td>\n",
" <td>C</td>\n",
" <td>100</td>\n",
" <td>110</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>day2</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>80</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" day_no class point1 point2\n",
"11 day1 A 100 120\n",
"12 day2 B 150 160\n",
"13 day1 A 200 100\n",
"14 day2 B 150 180\n",
"15 day1 C 100 110\n",
"16 day2 C 50 80"
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>day_no</th>\n",
" <th>class</th>\n",
" <th>point1</th>\n",
" <th>point2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>100</td>\n",
" <td>120</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>day2</td>\n",
" <td>B</td>\n",
" <td>150</td>\n",
" <td>160</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>200</td>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>day2</td>\n",
" <td>B</td>\n",
" <td>150</td>\n",
" <td>180</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>day1</td>\n",
" <td>C</td>\n",
" <td>100</td>\n",
" <td>110</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>day2</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>80</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" day_no class point1 point2\n",
"11 day1 A 100 120\n",
"12 day2 B 150 160\n",
"13 day1 A 200 100\n",
"14 day2 B 150 180\n",
"15 day1 C 100 110\n",
"16 day2 C 50 80"
]
},
"execution_count": 97,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.query('day_no == \"day1\"')\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>day_no</th>\n",
" <th>class</th>\n",
" <th>point1</th>\n",
" <th>point2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>day2</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>100</td>\n",
" <td>120</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>day1</td>\n",
" <td>C</td>\n",
" <td>100</td>\n",
" <td>110</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>day2</td>\n",
" <td>B</td>\n",
" <td>150</td>\n",
" <td>160</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>day2</td>\n",
" <td>B</td>\n",
" <td>150</td>\n",
" <td>180</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>200</td>\n",
" <td>100</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" day_no class point1 point2\n",
"16 day2 C 50 80\n",
"11 day1 A 100 120\n",
"15 day1 C 100 110\n",
"12 day2 B 150 160\n",
"14 day2 B 150 180\n",
"13 day1 A 200 100"
]
},
"execution_count": 99,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sort_values(\"point1\")"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>day_no</th>\n",
" <th>class</th>\n",
" <th>point1</th>\n",
" <th>point2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>day2</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>day1</td>\n",
" <td>C</td>\n",
" <td>100</td>\n",
" <td>110</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>100</td>\n",
" <td>120</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>day2</td>\n",
" <td>B</td>\n",
" <td>150</td>\n",
" <td>160</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>day2</td>\n",
" <td>B</td>\n",
" <td>150</td>\n",
" <td>180</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>200</td>\n",
" <td>100</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" day_no class point1 point2\n",
"16 day2 C 50 80\n",
"15 day1 C 100 110\n",
"11 day1 A 100 120\n",
"12 day2 B 150 160\n",
"14 day2 B 150 180\n",
"13 day1 A 200 100"
]
},
"execution_count": 101,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sort_values([\"point1\", \"point2\"])"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>day_no</th>\n",
" <th>class</th>\n",
" <th>point1</th>\n",
" <th>point2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>200</td>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>day2</td>\n",
" <td>B</td>\n",
" <td>150</td>\n",
" <td>160</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>day2</td>\n",
" <td>B</td>\n",
" <td>150</td>\n",
" <td>180</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>100</td>\n",
" <td>120</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>day1</td>\n",
" <td>C</td>\n",
" <td>100</td>\n",
" <td>110</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>day2</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>80</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" day_no class point1 point2\n",
"13 day1 A 200 100\n",
"12 day2 B 150 160\n",
"14 day2 B 150 180\n",
"11 day1 A 100 120\n",
"15 day1 C 100 110\n",
"16 day2 C 50 80"
]
},
"execution_count": 103,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sort_values(\"point1\", ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>day_no</th>\n",
" <th>class</th>\n",
" <th>point1</th>\n",
" <th>point2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>100</td>\n",
" <td>120</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>day2</td>\n",
" <td>B</td>\n",
" <td>150</td>\n",
" <td>160</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>200</td>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>day2</td>\n",
" <td>B</td>\n",
" <td>150</td>\n",
" <td>180</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>day1</td>\n",
" <td>C</td>\n",
" <td>100</td>\n",
" <td>110</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>day2</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>100</td>\n",
" <td>180</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" day_no class point1 point2\n",
"11 day1 A 100 120\n",
"12 day2 B 150 160\n",
"13 day1 A 200 100\n",
"14 day2 B 150 180\n",
"15 day1 C 100 110\n",
"16 day2 C 50 80\n",
"17 day1 A 100 180"
]
},
"execution_count": 114,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_add = pd.DataFrame([[\"day1\", \"A\",100,180]])\n",
"df_add.columns = [\"day_no\", \"class\", \"point1\", \"point2\"]\n",
"df_add.index = [17]\n",
"\n",
"pd.concat([df, df_add], axis=0)"
]
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df_cal = pd.DataFrame([[120, 160, 100, 180, 110, 80]]).T\n",
"df_cal.columns = [\"score3\"]\n",
"df_cal.index = [11, 12, 13, 14, 15, 16]"
]
},
{
"cell_type": "code",
"execution_count": 127,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>day_no</th>\n",
" <th>class</th>\n",
" <th>point1</th>\n",
" <th>point2</th>\n",
" <th>score3</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>100</td>\n",
" <td>120</td>\n",
" <td>120</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>day2</td>\n",
" <td>B</td>\n",
" <td>150</td>\n",
" <td>160</td>\n",
" <td>160</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>day1</td>\n",
" <td>A</td>\n",
" <td>200</td>\n",
" <td>100</td>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>day2</td>\n",
" <td>B</td>\n",
" <td>150</td>\n",
" <td>180</td>\n",
" <td>180</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>day1</td>\n",
" <td>C</td>\n",
" <td>100</td>\n",
" <td>110</td>\n",
" <td>110</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>day2</td>\n",
" <td>C</td>\n",
" <td>50</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" day_no class point1 point2 score3\n",
"11 day1 A 100 120 120\n",
"12 day2 B 150 160 160\n",
"13 day1 A 200 100 100\n",
"14 day2 B 150 180 180\n",
"15 day1 C 100 110 110\n",
"16 day2 C 50 80 80"
]
},
"execution_count": 127,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat([df, df_cal], axis=1)\n",
"\n",
"# df_addition_col = pd.DataFrame([[120,160,100,180,110,80]]).T\n",
"# df_addition_col.columns =[\"score3\"]\n",
"# df_addition_col.index = [11,12,13,21,22,23]\n",
"# df_addition_col\n",
"# pd.concat([df,df_addition_col],axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 139,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"day_no\n",
"day1 400\n",
"day2 350\n",
"Name: point1, dtype: int64"
]
},
"execution_count": 139,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"point1\"].sum(axis=0)\n",
"df[[\"point1\", \"point2\"]].sum(axis=1)\n",
"df.pivot_table('point1', aggfunc='sum', fill_value=0, columns=\"day_no\")"
]
},
{
"cell_type": "code",
"execution_count": 144,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>day_no</th>\n",
" <th>point1</th>\n",
" <th>point2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>day1</td>\n",
" <td>400</td>\n",
" <td>330</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>day2</td>\n",
" <td>350</td>\n",
" <td>420</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" day_no point1 point2\n",
"0 day1 400 330\n",
"1 day2 350 420"
]
},
"execution_count": 144,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grouped = df.groupby(\"day_no\", as_index=False)\n",
"grouped[[\"point1\", \"point2\"]].sum()"
]
},
{
"cell_type": "code",
"execution_count": 145,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df.to_csv(\"outpu.csv\")"
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [conda env:tensorflow]",
"language": "python",
"name": "conda-env-tensorflow-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment