Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aflaxman/436cde71f85b93638959 to your computer and use it in GitHub Desktop.
Save aflaxman/436cde71f85b93638959 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "",
"signature": "sha256:df0cfc88b93dede75c92963878f891c8f096e2628f7b5c8c6837c2fadc00cfc0"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"!date"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Thu Jun 26 10:11:57 PDT 2014\r\n"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df = pd.DataFrame({'A': [0,0,0,0,1,1],\n",
" 'B': [1,2,3,4,5,6],\n",
" 'C': [8,9,10,11,12,13]})"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df.describe()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td> 6.000000</td>\n",
" <td> 6.000000</td>\n",
" <td> 6.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td> 0.333333</td>\n",
" <td> 3.500000</td>\n",
" <td> 10.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td> 0.516398</td>\n",
" <td> 1.870829</td>\n",
" <td> 1.870829</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td> 0.000000</td>\n",
" <td> 1.000000</td>\n",
" <td> 8.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td> 0.000000</td>\n",
" <td> 2.250000</td>\n",
" <td> 9.250000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td> 0.000000</td>\n",
" <td> 3.500000</td>\n",
" <td> 10.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td> 0.750000</td>\n",
" <td> 4.750000</td>\n",
" <td> 11.750000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td> 1.000000</td>\n",
" <td> 6.000000</td>\n",
" <td> 13.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 9,
"text": [
" A B C\n",
"count 6.000000 6.000000 6.000000\n",
"mean 0.333333 3.500000 10.500000\n",
"std 0.516398 1.870829 1.870829\n",
"min 0.000000 1.000000 8.000000\n",
"25% 0.000000 2.250000 9.250000\n",
"50% 0.000000 3.500000 10.500000\n",
"75% 0.750000 4.750000 11.750000\n",
"max 1.000000 6.000000 13.000000"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df.groupby('A').describe()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" </tr>\n",
" <tr>\n",
" <th>A</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"8\" valign=\"top\">0</th>\n",
" <th>count</th>\n",
" <td> 4.000000</td>\n",
" <td> 4.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td> 2.500000</td>\n",
" <td> 9.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td> 1.290994</td>\n",
" <td> 1.290994</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td> 1.000000</td>\n",
" <td> 8.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td> 1.750000</td>\n",
" <td> 8.750000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td> 2.500000</td>\n",
" <td> 9.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td> 3.250000</td>\n",
" <td> 10.250000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td> 4.000000</td>\n",
" <td> 11.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"8\" valign=\"top\">1</th>\n",
" <th>count</th>\n",
" <td> 2.000000</td>\n",
" <td> 2.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td> 5.500000</td>\n",
" <td> 12.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td> 0.707107</td>\n",
" <td> 0.707107</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td> 5.000000</td>\n",
" <td> 12.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td> 5.250000</td>\n",
" <td> 12.250000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td> 5.500000</td>\n",
" <td> 12.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td> 5.750000</td>\n",
" <td> 12.750000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td> 6.000000</td>\n",
" <td> 13.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 10,
"text": [
" B C\n",
"A \n",
"0 count 4.000000 4.000000\n",
" mean 2.500000 9.500000\n",
" std 1.290994 1.290994\n",
" min 1.000000 8.000000\n",
" 25% 1.750000 8.750000\n",
" 50% 2.500000 9.500000\n",
" 75% 3.250000 10.250000\n",
" max 4.000000 11.000000\n",
"1 count 2.000000 2.000000\n",
" mean 5.500000 12.500000\n",
" std 0.707107 0.707107\n",
" min 5.000000 12.000000\n",
" 25% 5.250000 12.250000\n",
" 50% 5.500000 12.500000\n",
" 75% 5.750000 12.750000\n",
" max 6.000000 13.000000"
]
}
],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df.groupby('A').describe().unstack()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th colspan=\"8\" halign=\"left\">B</th>\n",
" <th colspan=\"8\" halign=\"left\">C</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>25%</th>\n",
" <th>50%</th>\n",
" <th>75%</th>\n",
" <th>max</th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>25%</th>\n",
" <th>50%</th>\n",
" <th>75%</th>\n",
" <th>max</th>\n",
" </tr>\n",
" <tr>\n",
" <th>A</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 4</td>\n",
" <td> 2.5</td>\n",
" <td> 1.290994</td>\n",
" <td> 1</td>\n",
" <td> 1.75</td>\n",
" <td> 2.5</td>\n",
" <td> 3.25</td>\n",
" <td> 4</td>\n",
" <td> 4</td>\n",
" <td> 9.5</td>\n",
" <td> 1.290994</td>\n",
" <td> 8</td>\n",
" <td> 8.75</td>\n",
" <td> 9.5</td>\n",
" <td> 10.25</td>\n",
" <td> 11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 2</td>\n",
" <td> 5.5</td>\n",
" <td> 0.707107</td>\n",
" <td> 5</td>\n",
" <td> 5.25</td>\n",
" <td> 5.5</td>\n",
" <td> 5.75</td>\n",
" <td> 6</td>\n",
" <td> 2</td>\n",
" <td> 12.5</td>\n",
" <td> 0.707107</td>\n",
" <td> 12</td>\n",
" <td> 12.25</td>\n",
" <td> 12.5</td>\n",
" <td> 12.75</td>\n",
" <td> 13</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 11,
"text": [
" B C \\\n",
" count mean std min 25% 50% 75% max count mean std \n",
"A \n",
"0 4 2.5 1.290994 1 1.75 2.5 3.25 4 4 9.5 1.290994 \n",
"1 2 5.5 0.707107 5 5.25 5.5 5.75 6 2 12.5 0.707107 \n",
"\n",
" \n",
" min 25% 50% 75% max \n",
"A \n",
"0 8 8.75 9.5 10.25 11 \n",
"1 12 12.25 12.5 12.75 13 "
]
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df.groupby('A').describe().unstack()\\\n",
" .loc[:,(slice(None),['count','mean']),]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th colspan=\"2\" halign=\"left\">B</th>\n",
" <th colspan=\"2\" halign=\"left\">C</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" </tr>\n",
" <tr>\n",
" <th>A</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 4</td>\n",
" <td> 2.5</td>\n",
" <td> 4</td>\n",
" <td> 9.5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 2</td>\n",
" <td> 5.5</td>\n",
" <td> 2</td>\n",
" <td> 12.5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 12,
"text": [
" B C \n",
" count mean count mean\n",
"A \n",
"0 4 2.5 4 9.5\n",
"1 2 5.5 2 12.5"
]
}
],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment