Created
June 26, 2014 17:21
-
-
Save aflaxman/436cde71f85b93638959 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:df0cfc88b93dede75c92963878f891c8f096e2628f7b5c8c6837c2fadc00cfc0" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"!date" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"Thu Jun 26 10:11:57 PDT 2014\r\n" | |
] | |
} | |
], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import pandas as pd" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df = pd.DataFrame({'A': [0,0,0,0,1,1],\n", | |
" 'B': [1,2,3,4,5,6],\n", | |
" 'C': [8,9,10,11,12,13]})" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 8 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df.describe()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>A</th>\n", | |
" <th>B</th>\n", | |
" <th>C</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>count</th>\n", | |
" <td> 6.000000</td>\n", | |
" <td> 6.000000</td>\n", | |
" <td> 6.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td> 0.333333</td>\n", | |
" <td> 3.500000</td>\n", | |
" <td> 10.500000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td> 0.516398</td>\n", | |
" <td> 1.870829</td>\n", | |
" <td> 1.870829</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 1.000000</td>\n", | |
" <td> 8.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 2.250000</td>\n", | |
" <td> 9.250000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 3.500000</td>\n", | |
" <td> 10.500000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td> 0.750000</td>\n", | |
" <td> 4.750000</td>\n", | |
" <td> 11.750000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td> 1.000000</td>\n", | |
" <td> 6.000000</td>\n", | |
" <td> 13.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 9, | |
"text": [ | |
" A B C\n", | |
"count 6.000000 6.000000 6.000000\n", | |
"mean 0.333333 3.500000 10.500000\n", | |
"std 0.516398 1.870829 1.870829\n", | |
"min 0.000000 1.000000 8.000000\n", | |
"25% 0.000000 2.250000 9.250000\n", | |
"50% 0.000000 3.500000 10.500000\n", | |
"75% 0.750000 4.750000 11.750000\n", | |
"max 1.000000 6.000000 13.000000" | |
] | |
} | |
], | |
"prompt_number": 9 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df.groupby('A').describe()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>B</th>\n", | |
" <th>C</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>A</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"8\" valign=\"top\">0</th>\n", | |
" <th>count</th>\n", | |
" <td> 4.000000</td>\n", | |
" <td> 4.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td> 2.500000</td>\n", | |
" <td> 9.500000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td> 1.290994</td>\n", | |
" <td> 1.290994</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td> 1.000000</td>\n", | |
" <td> 8.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td> 1.750000</td>\n", | |
" <td> 8.750000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td> 2.500000</td>\n", | |
" <td> 9.500000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td> 3.250000</td>\n", | |
" <td> 10.250000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td> 4.000000</td>\n", | |
" <td> 11.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"8\" valign=\"top\">1</th>\n", | |
" <th>count</th>\n", | |
" <td> 2.000000</td>\n", | |
" <td> 2.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td> 5.500000</td>\n", | |
" <td> 12.500000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td> 0.707107</td>\n", | |
" <td> 0.707107</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td> 5.000000</td>\n", | |
" <td> 12.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td> 5.250000</td>\n", | |
" <td> 12.250000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td> 5.500000</td>\n", | |
" <td> 12.500000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td> 5.750000</td>\n", | |
" <td> 12.750000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td> 6.000000</td>\n", | |
" <td> 13.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 10, | |
"text": [ | |
" B C\n", | |
"A \n", | |
"0 count 4.000000 4.000000\n", | |
" mean 2.500000 9.500000\n", | |
" std 1.290994 1.290994\n", | |
" min 1.000000 8.000000\n", | |
" 25% 1.750000 8.750000\n", | |
" 50% 2.500000 9.500000\n", | |
" 75% 3.250000 10.250000\n", | |
" max 4.000000 11.000000\n", | |
"1 count 2.000000 2.000000\n", | |
" mean 5.500000 12.500000\n", | |
" std 0.707107 0.707107\n", | |
" min 5.000000 12.000000\n", | |
" 25% 5.250000 12.250000\n", | |
" 50% 5.500000 12.500000\n", | |
" 75% 5.750000 12.750000\n", | |
" max 6.000000 13.000000" | |
] | |
} | |
], | |
"prompt_number": 10 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df.groupby('A').describe().unstack()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th colspan=\"8\" halign=\"left\">B</th>\n", | |
" <th colspan=\"8\" halign=\"left\">C</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th>count</th>\n", | |
" <th>mean</th>\n", | |
" <th>std</th>\n", | |
" <th>min</th>\n", | |
" <th>25%</th>\n", | |
" <th>50%</th>\n", | |
" <th>75%</th>\n", | |
" <th>max</th>\n", | |
" <th>count</th>\n", | |
" <th>mean</th>\n", | |
" <th>std</th>\n", | |
" <th>min</th>\n", | |
" <th>25%</th>\n", | |
" <th>50%</th>\n", | |
" <th>75%</th>\n", | |
" <th>max</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>A</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 4</td>\n", | |
" <td> 2.5</td>\n", | |
" <td> 1.290994</td>\n", | |
" <td> 1</td>\n", | |
" <td> 1.75</td>\n", | |
" <td> 2.5</td>\n", | |
" <td> 3.25</td>\n", | |
" <td> 4</td>\n", | |
" <td> 4</td>\n", | |
" <td> 9.5</td>\n", | |
" <td> 1.290994</td>\n", | |
" <td> 8</td>\n", | |
" <td> 8.75</td>\n", | |
" <td> 9.5</td>\n", | |
" <td> 10.25</td>\n", | |
" <td> 11</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 2</td>\n", | |
" <td> 5.5</td>\n", | |
" <td> 0.707107</td>\n", | |
" <td> 5</td>\n", | |
" <td> 5.25</td>\n", | |
" <td> 5.5</td>\n", | |
" <td> 5.75</td>\n", | |
" <td> 6</td>\n", | |
" <td> 2</td>\n", | |
" <td> 12.5</td>\n", | |
" <td> 0.707107</td>\n", | |
" <td> 12</td>\n", | |
" <td> 12.25</td>\n", | |
" <td> 12.5</td>\n", | |
" <td> 12.75</td>\n", | |
" <td> 13</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 11, | |
"text": [ | |
" B C \\\n", | |
" count mean std min 25% 50% 75% max count mean std \n", | |
"A \n", | |
"0 4 2.5 1.290994 1 1.75 2.5 3.25 4 4 9.5 1.290994 \n", | |
"1 2 5.5 0.707107 5 5.25 5.5 5.75 6 2 12.5 0.707107 \n", | |
"\n", | |
" \n", | |
" min 25% 50% 75% max \n", | |
"A \n", | |
"0 8 8.75 9.5 10.25 11 \n", | |
"1 12 12.25 12.5 12.75 13 " | |
] | |
} | |
], | |
"prompt_number": 11 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df.groupby('A').describe().unstack()\\\n", | |
" .loc[:,(slice(None),['count','mean']),]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th colspan=\"2\" halign=\"left\">B</th>\n", | |
" <th colspan=\"2\" halign=\"left\">C</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th>count</th>\n", | |
" <th>mean</th>\n", | |
" <th>count</th>\n", | |
" <th>mean</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>A</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 4</td>\n", | |
" <td> 2.5</td>\n", | |
" <td> 4</td>\n", | |
" <td> 9.5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 2</td>\n", | |
" <td> 5.5</td>\n", | |
" <td> 2</td>\n", | |
" <td> 12.5</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 12, | |
"text": [ | |
" B C \n", | |
" count mean count mean\n", | |
"A \n", | |
"0 4 2.5 4 9.5\n", | |
"1 2 5.5 2 12.5" | |
] | |
} | |
], | |
"prompt_number": 12 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment