Last active
August 29, 2015 13:57
-
-
Save rdhyee/9621595 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:e138f744a702941414fb2fffe3cfd420bb75352e2fbf7496bc9571d64a688ff8" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"[Q]: (https://piazza.com/class/hqo4ux2fcns3ds?cid=10)**Why do the axis specifications behave differently for different functions?**\n", | |
"\n", | |
">Hello,\n", | |
" \n", | |
"> An example:\n", | |
" \n", | |
"```Python \n", | |
"df1 = DataFrame({'a': range(0,10), 'b': range(10,20)})\n", | |
" \n", | |
"df1.apply(lambda x: sum(x), axis=1) # This would sum across columns.\n", | |
"df1.div(df1.sum().astype(float), axis=1) # This would normalize across the rows.\n", | |
"df1.div(df1.sum(1).astype(float), axis=0) # This would normalize across the columns.\n", | |
"```\n", | |
" \n", | |
"> Thanks!\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"from pandas import DataFrame\n", | |
"\n", | |
"df1 = DataFrame({'a': range(0,10), 'b': range(10,20)})\n", | |
"df1" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>a</th>\n", | |
" <th>b</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 0</td>\n", | |
" <td> 10</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 1</td>\n", | |
" <td> 11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 2</td>\n", | |
" <td> 12</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 3</td>\n", | |
" <td> 13</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td> 4</td>\n", | |
" <td> 14</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td> 5</td>\n", | |
" <td> 15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td> 6</td>\n", | |
" <td> 16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td> 7</td>\n", | |
" <td> 17</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td> 8</td>\n", | |
" <td> 18</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td> 9</td>\n", | |
" <td> 19</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>10 rows \u00d7 2 columns</p>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 1, | |
"text": [ | |
" a b\n", | |
"0 0 10\n", | |
"1 1 11\n", | |
"2 2 12\n", | |
"3 3 13\n", | |
"4 4 14\n", | |
"5 5 15\n", | |
"6 6 16\n", | |
"7 7 17\n", | |
"8 8 18\n", | |
"9 9 19\n", | |
"\n", | |
"[10 rows x 2 columns]" | |
] | |
} | |
], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df1.apply(lambda x: sum(x), axis=1) # This would sum across columns." | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 2, | |
"text": [ | |
"0 10\n", | |
"1 12\n", | |
"2 14\n", | |
"3 16\n", | |
"4 18\n", | |
"5 20\n", | |
"6 22\n", | |
"7 24\n", | |
"8 26\n", | |
"9 28\n", | |
"dtype: int64" | |
] | |
} | |
], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Useful to see what\n", | |
"\n", | |
" df1.sum().astype(float)\n", | |
" \n", | |
"does...specifically that it has index with 2 elements -- `a`, `b`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.sum.html\n", | |
"df1.sum().astype(float)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 3, | |
"text": [ | |
"a 45\n", | |
"b 145\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df1.sum(1).astype(float)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 4, | |
"text": [ | |
"0 10\n", | |
"1 12\n", | |
"2 14\n", | |
"3 16\n", | |
"4 18\n", | |
"5 20\n", | |
"6 22\n", | |
"7 24\n", | |
"8 26\n", | |
"9 28\n", | |
"dtype: float64" | |
] | |
} | |
], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df1.sum(1)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 5, | |
"text": [ | |
"0 10\n", | |
"1 12\n", | |
"2 14\n", | |
"3 16\n", | |
"4 18\n", | |
"5 20\n", | |
"6 22\n", | |
"7 24\n", | |
"8 26\n", | |
"9 28\n", | |
"dtype: int64" | |
] | |
} | |
], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df1.div(df1.sum().astype(float), axis=1) # This would normalize across the rows." | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>a</th>\n", | |
" <th>b</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.068966</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 0.022222</td>\n", | |
" <td> 0.075862</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 0.044444</td>\n", | |
" <td> 0.082759</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 0.066667</td>\n", | |
" <td> 0.089655</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td> 0.088889</td>\n", | |
" <td> 0.096552</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td> 0.111111</td>\n", | |
" <td> 0.103448</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td> 0.133333</td>\n", | |
" <td> 0.110345</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td> 0.155556</td>\n", | |
" <td> 0.117241</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td> 0.177778</td>\n", | |
" <td> 0.124138</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td> 0.200000</td>\n", | |
" <td> 0.131034</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>10 rows \u00d7 2 columns</p>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 6, | |
"text": [ | |
" a b\n", | |
"0 0.000000 0.068966\n", | |
"1 0.022222 0.075862\n", | |
"2 0.044444 0.082759\n", | |
"3 0.066667 0.089655\n", | |
"4 0.088889 0.096552\n", | |
"5 0.111111 0.103448\n", | |
"6 0.133333 0.110345\n", | |
"7 0.155556 0.117241\n", | |
"8 0.177778 0.124138\n", | |
"9 0.200000 0.131034\n", | |
"\n", | |
"[10 rows x 2 columns]" | |
] | |
} | |
], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# another way to see how df1.div(df1.sum().astype(float), axis=1) works\n", | |
"# axis = 1 means row by row\n", | |
"\n", | |
"for i in range(len(df1)):\n", | |
" print i\n", | |
" print df1.ix[i] / df1.sum().astype(float)\n", | |
" print" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"0\n", | |
"a 0.000000\n", | |
"b 0.068966\n", | |
"dtype: float64\n", | |
"\n", | |
"1\n", | |
"a 0.022222\n", | |
"b 0.075862\n", | |
"dtype: float64\n", | |
"\n", | |
"2\n", | |
"a 0.044444\n", | |
"b 0.082759\n", | |
"dtype: float64\n", | |
"\n", | |
"3\n", | |
"a 0.066667\n", | |
"b 0.089655\n", | |
"dtype: float64\n", | |
"\n", | |
"4\n", | |
"a 0.088889\n", | |
"b 0.096552\n", | |
"dtype: float64\n", | |
"\n", | |
"5\n", | |
"a 0.111111\n", | |
"b 0.103448\n", | |
"dtype: float64\n", | |
"\n", | |
"6\n", | |
"a 0.133333\n", | |
"b 0.110345\n", | |
"dtype: float64\n", | |
"\n", | |
"7\n", | |
"a 0.155556\n", | |
"b 0.117241\n", | |
"dtype: float64\n", | |
"\n", | |
"8\n", | |
"a 0.177778\n", | |
"b 0.124138\n", | |
"dtype: float64\n", | |
"\n", | |
"9\n", | |
"a 0.200000\n", | |
"b 0.131034\n", | |
"dtype: float64\n", | |
"\n" | |
] | |
} | |
], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df1.div(df1.sum(1).astype(float), axis=0) # This would normalize across the columns." | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>a</th>\n", | |
" <th>b</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 1.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 0.083333</td>\n", | |
" <td> 0.916667</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 0.142857</td>\n", | |
" <td> 0.857143</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 0.187500</td>\n", | |
" <td> 0.812500</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td> 0.222222</td>\n", | |
" <td> 0.777778</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td> 0.250000</td>\n", | |
" <td> 0.750000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td> 0.272727</td>\n", | |
" <td> 0.727273</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td> 0.291667</td>\n", | |
" <td> 0.708333</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td> 0.307692</td>\n", | |
" <td> 0.692308</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td> 0.321429</td>\n", | |
" <td> 0.678571</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>10 rows \u00d7 2 columns</p>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 8, | |
"text": [ | |
" a b\n", | |
"0 0.000000 1.000000\n", | |
"1 0.083333 0.916667\n", | |
"2 0.142857 0.857143\n", | |
"3 0.187500 0.812500\n", | |
"4 0.222222 0.777778\n", | |
"5 0.250000 0.750000\n", | |
"6 0.272727 0.727273\n", | |
"7 0.291667 0.708333\n", | |
"8 0.307692 0.692308\n", | |
"9 0.321429 0.678571\n", | |
"\n", | |
"[10 rows x 2 columns]" | |
] | |
} | |
], | |
"prompt_number": 8 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# another way to see how df1.div(df1.sum(1).astype(float), axis=0) works\n", | |
"# axis=0 means column by column\n", | |
"\n", | |
"for col in df1:\n", | |
" print col\n", | |
" print df1[col]/df1.sum(1).astype(float)\n", | |
" print" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"a\n", | |
"0 0.000000\n", | |
"1 0.083333\n", | |
"2 0.142857\n", | |
"3 0.187500\n", | |
"4 0.222222\n", | |
"5 0.250000\n", | |
"6 0.272727\n", | |
"7 0.291667\n", | |
"8 0.307692\n", | |
"9 0.321429\n", | |
"dtype: float64\n", | |
"\n", | |
"b\n", | |
"0 1.000000\n", | |
"1 0.916667\n", | |
"2 0.857143\n", | |
"3 0.812500\n", | |
"4 0.777778\n", | |
"5 0.750000\n", | |
"6 0.727273\n", | |
"7 0.708333\n", | |
"8 0.692308\n", | |
"9 0.678571\n", | |
"dtype: float64\n", | |
"\n" | |
] | |
} | |
], | |
"prompt_number": 9 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment