Last active
October 1, 2015 18:36
-
-
Save decisionstats/4142e98375445c5e4174 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:1fbc7d13b08868781650d566f6ad0c1b1f1d475a8a58c561db89ddf159f3fda7" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import pandas as pd #importing packages\n", | |
"import os as os\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#pd.describe_option() #describe options for customizing" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#pd.get_option(\"display.memory_usage\")#setting some options" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"os.getcwd() #current working directory\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 4, | |
"text": [ | |
"'/home/ajay'" | |
] | |
} | |
], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"os.chdir('/home/ajay/Desktop')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"os.getcwd()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 6, | |
"text": [ | |
"'/home/ajay/Desktop'" | |
] | |
} | |
], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"a=os.getcwd()\n", | |
"os.listdir(a)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 7, | |
"text": [ | |
"['adult.data']" | |
] | |
} | |
], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"names2=[\"age\",\"workclass\",\"fnlwgt\",\"education\",\"education-num\",\"marital-status\",\"occupation\",\"relationship\",\"race\",\"sex\",\"capital-gain\",\"capital-loss\",\"hours-per-week\",\"native-country\",\"income\"]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 8 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"len(names2)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 9, | |
"text": [ | |
"15" | |
] | |
} | |
], | |
"prompt_number": 9 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"adult=pd.read_csv(\"adult.data\",header=None)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 10 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"len(adult)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 11, | |
"text": [ | |
"32562" | |
] | |
} | |
], | |
"prompt_number": 11 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"adult.columns" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 12, | |
"text": [ | |
"Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], dtype='int64')" | |
] | |
} | |
], | |
"prompt_number": 12 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"adult.info()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"Int64Index: 32562 entries, 0 to 32561\n", | |
"Data columns (total 15 columns):\n", | |
"0 32561 non-null float64\n", | |
"1 32561 non-null object\n", | |
"2 32561 non-null float64\n", | |
"3 32561 non-null object\n", | |
"4 32561 non-null float64\n", | |
"5 32561 non-null object\n", | |
"6 32561 non-null object\n", | |
"7 32561 non-null object\n", | |
"8 32561 non-null object\n", | |
"9 32561 non-null object\n", | |
"10 32561 non-null float64\n", | |
"11 32561 non-null float64\n", | |
"12 32561 non-null float64\n", | |
"13 32561 non-null object\n", | |
"14 32561 non-null object\n", | |
"dtypes: float64(6), object(9)" | |
] | |
} | |
], | |
"prompt_number": 13 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"adult.head(8)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" <th>3</th>\n", | |
" <th>4</th>\n", | |
" <th>5</th>\n", | |
" <th>6</th>\n", | |
" <th>7</th>\n", | |
" <th>8</th>\n", | |
" <th>9</th>\n", | |
" <th>10</th>\n", | |
" <th>11</th>\n", | |
" <th>12</th>\n", | |
" <th>13</th>\n", | |
" <th>14</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td> 39</td>\n", | |
" <td> State-gov</td>\n", | |
" <td> 77516</td>\n", | |
" <td> Bachelors</td>\n", | |
" <td> 13</td>\n", | |
" <td> Never-married</td>\n", | |
" <td> Adm-clerical</td>\n", | |
" <td> Not-in-family</td>\n", | |
" <td> White</td>\n", | |
" <td> Male</td>\n", | |
" <td> 2174</td>\n", | |
" <td> 0</td>\n", | |
" <td> 40</td>\n", | |
" <td> United-States</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td> 50</td>\n", | |
" <td> Self-emp-not-inc</td>\n", | |
" <td> 83311</td>\n", | |
" <td> Bachelors</td>\n", | |
" <td> 13</td>\n", | |
" <td> Married-civ-spouse</td>\n", | |
" <td> Exec-managerial</td>\n", | |
" <td> Husband</td>\n", | |
" <td> White</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 13</td>\n", | |
" <td> United-States</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td> 38</td>\n", | |
" <td> Private</td>\n", | |
" <td> 215646</td>\n", | |
" <td> HS-grad</td>\n", | |
" <td> 9</td>\n", | |
" <td> Divorced</td>\n", | |
" <td> Handlers-cleaners</td>\n", | |
" <td> Not-in-family</td>\n", | |
" <td> White</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 40</td>\n", | |
" <td> United-States</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td> 53</td>\n", | |
" <td> Private</td>\n", | |
" <td> 234721</td>\n", | |
" <td> 11th</td>\n", | |
" <td> 7</td>\n", | |
" <td> Married-civ-spouse</td>\n", | |
" <td> Handlers-cleaners</td>\n", | |
" <td> Husband</td>\n", | |
" <td> Black</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 40</td>\n", | |
" <td> United-States</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td> 28</td>\n", | |
" <td> Private</td>\n", | |
" <td> 338409</td>\n", | |
" <td> Bachelors</td>\n", | |
" <td> 13</td>\n", | |
" <td> Married-civ-spouse</td>\n", | |
" <td> Prof-specialty</td>\n", | |
" <td> Wife</td>\n", | |
" <td> Black</td>\n", | |
" <td> Female</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 40</td>\n", | |
" <td> Cuba</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td> 37</td>\n", | |
" <td> Private</td>\n", | |
" <td> 284582</td>\n", | |
" <td> Masters</td>\n", | |
" <td> 14</td>\n", | |
" <td> Married-civ-spouse</td>\n", | |
" <td> Exec-managerial</td>\n", | |
" <td> Wife</td>\n", | |
" <td> White</td>\n", | |
" <td> Female</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 40</td>\n", | |
" <td> United-States</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td> 49</td>\n", | |
" <td> Private</td>\n", | |
" <td> 160187</td>\n", | |
" <td> 9th</td>\n", | |
" <td> 5</td>\n", | |
" <td> Married-spouse-absent</td>\n", | |
" <td> Other-service</td>\n", | |
" <td> Not-in-family</td>\n", | |
" <td> Black</td>\n", | |
" <td> Female</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 16</td>\n", | |
" <td> Jamaica</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td> 52</td>\n", | |
" <td> Self-emp-not-inc</td>\n", | |
" <td> 209642</td>\n", | |
" <td> HS-grad</td>\n", | |
" <td> 9</td>\n", | |
" <td> Married-civ-spouse</td>\n", | |
" <td> Exec-managerial</td>\n", | |
" <td> Husband</td>\n", | |
" <td> White</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 45</td>\n", | |
" <td> United-States</td>\n", | |
" <td> >50K</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 14, | |
"text": [ | |
" 0 1 2 3 4 5 \\\n", | |
"0 39 State-gov 77516 Bachelors 13 Never-married \n", | |
"1 50 Self-emp-not-inc 83311 Bachelors 13 Married-civ-spouse \n", | |
"2 38 Private 215646 HS-grad 9 Divorced \n", | |
"3 53 Private 234721 11th 7 Married-civ-spouse \n", | |
"4 28 Private 338409 Bachelors 13 Married-civ-spouse \n", | |
"5 37 Private 284582 Masters 14 Married-civ-spouse \n", | |
"6 49 Private 160187 9th 5 Married-spouse-absent \n", | |
"7 52 Self-emp-not-inc 209642 HS-grad 9 Married-civ-spouse \n", | |
"\n", | |
" 6 7 8 9 10 11 12 \\\n", | |
"0 Adm-clerical Not-in-family White Male 2174 0 40 \n", | |
"1 Exec-managerial Husband White Male 0 0 13 \n", | |
"2 Handlers-cleaners Not-in-family White Male 0 0 40 \n", | |
"3 Handlers-cleaners Husband Black Male 0 0 40 \n", | |
"4 Prof-specialty Wife Black Female 0 0 40 \n", | |
"5 Exec-managerial Wife White Female 0 0 40 \n", | |
"6 Other-service Not-in-family Black Female 0 0 16 \n", | |
"7 Exec-managerial Husband White Male 0 0 45 \n", | |
"\n", | |
" 13 14 \n", | |
"0 United-States <=50K \n", | |
"1 United-States <=50K \n", | |
"2 United-States <=50K \n", | |
"3 United-States <=50K \n", | |
"4 Cuba <=50K \n", | |
"5 United-States <=50K \n", | |
"6 Jamaica <=50K \n", | |
"7 United-States >50K " | |
] | |
} | |
], | |
"prompt_number": 14 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"adult.columns= names2" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 15 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"adult.head(30)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>age</th>\n", | |
" <th>workclass</th>\n", | |
" <th>fnlwgt</th>\n", | |
" <th>education</th>\n", | |
" <th>education-num</th>\n", | |
" <th>marital-status</th>\n", | |
" <th>occupation</th>\n", | |
" <th>relationship</th>\n", | |
" <th>race</th>\n", | |
" <th>sex</th>\n", | |
" <th>capital-gain</th>\n", | |
" <th>capital-loss</th>\n", | |
" <th>hours-per-week</th>\n", | |
" <th>native-country</th>\n", | |
" <th>income</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0 </th>\n", | |
" <td> 39</td>\n", | |
" <td> State-gov</td>\n", | |
" <td> 77516</td>\n", | |
" <td> Bachelors</td>\n", | |
" <td> 13</td>\n", | |
" <td> Never-married</td>\n", | |
" <td> Adm-clerical</td>\n", | |
" <td> Not-in-family</td>\n", | |
" <td> White</td>\n", | |
" <td> Male</td>\n", | |
" <td> 2174</td>\n", | |
" <td> 0</td>\n", | |
" <td> 40</td>\n", | |
" <td> United-States</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1 </th>\n", | |
" <td> 50</td>\n", | |
" <td> Self-emp-not-inc</td>\n", | |
" <td> 83311</td>\n", | |
" <td> Bachelors</td>\n", | |
" <td> 13</td>\n", | |
" <td> Married-civ-spouse</td>\n", | |
" <td> Exec-managerial</td>\n", | |
" <td> Husband</td>\n", | |
" <td> White</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 13</td>\n", | |
" <td> United-States</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2 </th>\n", | |
" <td> 38</td>\n", | |
" <td> Private</td>\n", | |
" <td> 215646</td>\n", | |
" <td> HS-grad</td>\n", | |
" <td> 9</td>\n", | |
" <td> Divorced</td>\n", | |
" <td> Handlers-cleaners</td>\n", | |
" <td> Not-in-family</td>\n", | |
" <td> White</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 40</td>\n", | |
" <td> United-States</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3 </th>\n", | |
" <td> 53</td>\n", | |
" <td> Private</td>\n", | |
" <td> 234721</td>\n", | |
" <td> 11th</td>\n", | |
" <td> 7</td>\n", | |
" <td> Married-civ-spouse</td>\n", | |
" <td> Handlers-cleaners</td>\n", | |
" <td> Husband</td>\n", | |
" <td> Black</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 40</td>\n", | |
" <td> United-States</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4 </th>\n", | |
" <td> 28</td>\n", | |
" <td> Private</td>\n", | |
" <td> 338409</td>\n", | |
" <td> Bachelors</td>\n", | |
" <td> 13</td>\n", | |
" <td> Married-civ-spouse</td>\n", | |
" <td> Prof-specialty</td>\n", | |
" <td> Wife</td>\n", | |
" <td> Black</td>\n", | |
" <td> Female</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 40</td>\n", | |
" <td> Cuba</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5 </th>\n", | |
" <td> 37</td>\n", | |
" <td> Private</td>\n", | |
" <td> 284582</td>\n", | |
" <td> Masters</td>\n", | |
" <td> 14</td>\n", | |
" <td> Married-civ-spouse</td>\n", | |
" <td> Exec-managerial</td>\n", | |
" <td> Wife</td>\n", | |
" <td> White</td>\n", | |
" <td> Female</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 40</td>\n", | |
" <td> United-States</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6 </th>\n", | |
" <td> 49</td>\n", | |
" <td> Private</td>\n", | |
" <td> 160187</td>\n", | |
" <td> 9th</td>\n", | |
" <td> 5</td>\n", | |
" <td> Married-spouse-absent</td>\n", | |
" <td> Other-service</td>\n", | |
" <td> Not-in-family</td>\n", | |
" <td> Black</td>\n", | |
" <td> Female</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 16</td>\n", | |
" <td> Jamaica</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7 </th>\n", | |
" <td> 52</td>\n", | |
" <td> Self-emp-not-inc</td>\n", | |
" <td> 209642</td>\n", | |
" <td> HS-grad</td>\n", | |
" <td> 9</td>\n", | |
" <td> Married-civ-spouse</td>\n", | |
" <td> Exec-managerial</td>\n", | |
" <td> Husband</td>\n", | |
" <td> White</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 45</td>\n", | |
" <td> United-States</td>\n", | |
" <td> >50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8 </th>\n", | |
" <td> 31</td>\n", | |
" <td> Private</td>\n", | |
" <td> 45781</td>\n", | |
" <td> Masters</td>\n", | |
" <td> 14</td>\n", | |
" <td> Never-married</td>\n", | |
" <td> Prof-specialty</td>\n", | |
" <td> Not-in-family</td>\n", | |
" <td> White</td>\n", | |
" <td> Female</td>\n", | |
" <td> 14084</td>\n", | |
" <td> 0</td>\n", | |
" <td> 50</td>\n", | |
" <td> United-States</td>\n", | |
" <td> >50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9 </th>\n", | |
" <td> 42</td>\n", | |
" <td> Private</td>\n", | |
" <td> 159449</td>\n", | |
" <td> Bachelors</td>\n", | |
" <td> 13</td>\n", | |
" <td> Married-civ-spouse</td>\n", | |
" <td> Exec-managerial</td>\n", | |
" <td> Husband</td>\n", | |
" <td> White</td>\n", | |
" <td> Male</td>\n", | |
" <td> 5178</td>\n", | |
" <td> 0</td>\n", | |
" <td> 40</td>\n", | |
" <td> United-States</td>\n", | |
" <td> >50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td> 37</td>\n", | |
" <td> Private</td>\n", | |
" <td> 280464</td>\n", | |
" <td> Some-college</td>\n", | |
" <td> 10</td>\n", | |
" <td> Married-civ-spouse</td>\n", | |
" <td> Exec-managerial</td>\n", | |
" <td> Husband</td>\n", | |
" <td> Black</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 80</td>\n", | |
" <td> United-States</td>\n", | |
" <td> >50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td> 30</td>\n", | |
" <td> State-gov</td>\n", | |
" <td> 141297</td>\n", | |
" <td> Bachelors</td>\n", | |
" <td> 13</td>\n", | |
" <td> Married-civ-spouse</td>\n", | |
" <td> Prof-specialty</td>\n", | |
" <td> Husband</td>\n", | |
" <td> Asian-Pac-Islander</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 40</td>\n", | |
" <td> India</td>\n", | |
" <td> >50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td> 23</td>\n", | |
" <td> Private</td>\n", | |
" <td> 122272</td>\n", | |
" <td> Bachelors</td>\n", | |
" <td> 13</td>\n", | |
" <td> Never-married</td>\n", | |
" <td> Adm-clerical</td>\n", | |
" <td> Own-child</td>\n", | |
" <td> White</td>\n", | |
" <td> Female</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 30</td>\n", | |
" <td> United-States</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td> 32</td>\n", | |
" <td> Private</td>\n", | |
" <td> 205019</td>\n", | |
" <td> Assoc-acdm</td>\n", | |
" <td> 12</td>\n", | |
" <td> Never-married</td>\n", | |
" <td> Sales</td>\n", | |
" <td> Not-in-family</td>\n", | |
" <td> Black</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 50</td>\n", | |
" <td> United-States</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td> 40</td>\n", | |
" <td> Private</td>\n", | |
" <td> 121772</td>\n", | |
" <td> Assoc-voc</td>\n", | |
" <td> 11</td>\n", | |
" <td> Married-civ-spouse</td>\n", | |
" <td> Craft-repair</td>\n", | |
" <td> Husband</td>\n", | |
" <td> Asian-Pac-Islander</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 40</td>\n", | |
" <td> ?</td>\n", | |
" <td> >50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td> 34</td>\n", | |
" <td> Private</td>\n", | |
" <td> 245487</td>\n", | |
" <td> 7th-8th</td>\n", | |
" <td> 4</td>\n", | |
" <td> Married-civ-spouse</td>\n", | |
" <td> Transport-moving</td>\n", | |
" <td> Husband</td>\n", | |
" <td> Amer-Indian-Eskimo</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 45</td>\n", | |
" <td> Mexico</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td> 25</td>\n", | |
" <td> Self-emp-not-inc</td>\n", | |
" <td> 176756</td>\n", | |
" <td> HS-grad</td>\n", | |
" <td> 9</td>\n", | |
" <td> Never-married</td>\n", | |
" <td> Farming-fishing</td>\n", | |
" <td> Own-child</td>\n", | |
" <td> White</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 35</td>\n", | |
" <td> United-States</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td> 32</td>\n", | |
" <td> Private</td>\n", | |
" <td> 186824</td>\n", | |
" <td> HS-grad</td>\n", | |
" <td> 9</td>\n", | |
" <td> Never-married</td>\n", | |
" <td> Machine-op-inspct</td>\n", | |
" <td> Unmarried</td>\n", | |
" <td> White</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 40</td>\n", | |
" <td> United-States</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td> 38</td>\n", | |
" <td> Private</td>\n", | |
" <td> 28887</td>\n", | |
" <td> 11th</td>\n", | |
" <td> 7</td>\n", | |
" <td> Married-civ-spouse</td>\n", | |
" <td> Sales</td>\n", | |
" <td> Husband</td>\n", | |
" <td> White</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 50</td>\n", | |
" <td> United-States</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td> 43</td>\n", | |
" <td> Self-emp-not-inc</td>\n", | |
" <td> 292175</td>\n", | |
" <td> Masters</td>\n", | |
" <td> 14</td>\n", | |
" <td> Divorced</td>\n", | |
" <td> Exec-managerial</td>\n", | |
" <td> Unmarried</td>\n", | |
" <td> White</td>\n", | |
" <td> Female</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 45</td>\n", | |
" <td> United-States</td>\n", | |
" <td> >50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>20</th>\n", | |
" <td> 40</td>\n", | |
" <td> Private</td>\n", | |
" <td> 193524</td>\n", | |
" <td> Doctorate</td>\n", | |
" <td> 16</td>\n", | |
" <td> Married-civ-spouse</td>\n", | |
" <td> Prof-specialty</td>\n", | |
" <td> Husband</td>\n", | |
" <td> White</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 60</td>\n", | |
" <td> United-States</td>\n", | |
" <td> >50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>21</th>\n", | |
" <td> 54</td>\n", | |
" <td> Private</td>\n", | |
" <td> 302146</td>\n", | |
" <td> HS-grad</td>\n", | |
" <td> 9</td>\n", | |
" <td> Separated</td>\n", | |
" <td> Other-service</td>\n", | |
" <td> Unmarried</td>\n", | |
" <td> Black</td>\n", | |
" <td> Female</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 20</td>\n", | |
" <td> United-States</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>22</th>\n", | |
" <td> 35</td>\n", | |
" <td> Federal-gov</td>\n", | |
" <td> 76845</td>\n", | |
" <td> 9th</td>\n", | |
" <td> 5</td>\n", | |
" <td> Married-civ-spouse</td>\n", | |
" <td> Farming-fishing</td>\n", | |
" <td> Husband</td>\n", | |
" <td> Black</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 40</td>\n", | |
" <td> United-States</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>23</th>\n", | |
" <td> 43</td>\n", | |
" <td> Private</td>\n", | |
" <td> 117037</td>\n", | |
" <td> 11th</td>\n", | |
" <td> 7</td>\n", | |
" <td> Married-civ-spouse</td>\n", | |
" <td> Transport-moving</td>\n", | |
" <td> Husband</td>\n", | |
" <td> White</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 2042</td>\n", | |
" <td> 40</td>\n", | |
" <td> United-States</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>24</th>\n", | |
" <td> 59</td>\n", | |
" <td> Private</td>\n", | |
" <td> 109015</td>\n", | |
" <td> HS-grad</td>\n", | |
" <td> 9</td>\n", | |
" <td> Divorced</td>\n", | |
" <td> Tech-support</td>\n", | |
" <td> Unmarried</td>\n", | |
" <td> White</td>\n", | |
" <td> Female</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 40</td>\n", | |
" <td> United-States</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25</th>\n", | |
" <td> 56</td>\n", | |
" <td> Local-gov</td>\n", | |
" <td> 216851</td>\n", | |
" <td> Bachelors</td>\n", | |
" <td> 13</td>\n", | |
" <td> Married-civ-spouse</td>\n", | |
" <td> Tech-support</td>\n", | |
" <td> Husband</td>\n", | |
" <td> White</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 40</td>\n", | |
" <td> United-States</td>\n", | |
" <td> >50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>26</th>\n", | |
" <td> 19</td>\n", | |
" <td> Private</td>\n", | |
" <td> 168294</td>\n", | |
" <td> HS-grad</td>\n", | |
" <td> 9</td>\n", | |
" <td> Never-married</td>\n", | |
" <td> Craft-repair</td>\n", | |
" <td> Own-child</td>\n", | |
" <td> White</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 40</td>\n", | |
" <td> United-States</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>27</th>\n", | |
" <td> 54</td>\n", | |
" <td> ?</td>\n", | |
" <td> 180211</td>\n", | |
" <td> Some-college</td>\n", | |
" <td> 10</td>\n", | |
" <td> Married-civ-spouse</td>\n", | |
" <td> ?</td>\n", | |
" <td> Husband</td>\n", | |
" <td> Asian-Pac-Islander</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 60</td>\n", | |
" <td> South</td>\n", | |
" <td> >50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>28</th>\n", | |
" <td> 39</td>\n", | |
" <td> Private</td>\n", | |
" <td> 367260</td>\n", | |
" <td> HS-grad</td>\n", | |
" <td> 9</td>\n", | |
" <td> Divorced</td>\n", | |
" <td> Exec-managerial</td>\n", | |
" <td> Not-in-family</td>\n", | |
" <td> White</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 80</td>\n", | |
" <td> United-States</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>29</th>\n", | |
" <td> 49</td>\n", | |
" <td> Private</td>\n", | |
" <td> 193366</td>\n", | |
" <td> HS-grad</td>\n", | |
" <td> 9</td>\n", | |
" <td> Married-civ-spouse</td>\n", | |
" <td> Craft-repair</td>\n", | |
" <td> Husband</td>\n", | |
" <td> White</td>\n", | |
" <td> Male</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 40</td>\n", | |
" <td> United-States</td>\n", | |
" <td> <=50K</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 16, | |
"text": [ | |
" age workclass fnlwgt education education-num \\\n", | |
"0 39 State-gov 77516 Bachelors 13 \n", | |
"1 50 Self-emp-not-inc 83311 Bachelors 13 \n", | |
"2 38 Private 215646 HS-grad 9 \n", | |
"3 53 Private 234721 11th 7 \n", | |
"4 28 Private 338409 Bachelors 13 \n", | |
"5 37 Private 284582 Masters 14 \n", | |
"6 49 Private 160187 9th 5 \n", | |
"7 52 Self-emp-not-inc 209642 HS-grad 9 \n", | |
"8 31 Private 45781 Masters 14 \n", | |
"9 42 Private 159449 Bachelors 13 \n", | |
"10 37 Private 280464 Some-college 10 \n", | |
"11 30 State-gov 141297 Bachelors 13 \n", | |
"12 23 Private 122272 Bachelors 13 \n", | |
"13 32 Private 205019 Assoc-acdm 12 \n", | |
"14 40 Private 121772 Assoc-voc 11 \n", | |
"15 34 Private 245487 7th-8th 4 \n", | |
"16 25 Self-emp-not-inc 176756 HS-grad 9 \n", | |
"17 32 Private 186824 HS-grad 9 \n", | |
"18 38 Private 28887 11th 7 \n", | |
"19 43 Self-emp-not-inc 292175 Masters 14 \n", | |
"20 40 Private 193524 Doctorate 16 \n", | |
"21 54 Private 302146 HS-grad 9 \n", | |
"22 35 Federal-gov 76845 9th 5 \n", | |
"23 43 Private 117037 11th 7 \n", | |
"24 59 Private 109015 HS-grad 9 \n", | |
"25 56 Local-gov 216851 Bachelors 13 \n", | |
"26 19 Private 168294 HS-grad 9 \n", | |
"27 54 ? 180211 Some-college 10 \n", | |
"28 39 Private 367260 HS-grad 9 \n", | |
"29 49 Private 193366 HS-grad 9 \n", | |
"\n", | |
" marital-status occupation relationship \\\n", | |
"0 Never-married Adm-clerical Not-in-family \n", | |
"1 Married-civ-spouse Exec-managerial Husband \n", | |
"2 Divorced Handlers-cleaners Not-in-family \n", | |
"3 Married-civ-spouse Handlers-cleaners Husband \n", | |
"4 Married-civ-spouse Prof-specialty Wife \n", | |
"5 Married-civ-spouse Exec-managerial Wife \n", | |
"6 Married-spouse-absent Other-service Not-in-family \n", | |
"7 Married-civ-spouse Exec-managerial Husband \n", | |
"8 Never-married Prof-specialty Not-in-family \n", | |
"9 Married-civ-spouse Exec-managerial Husband \n", | |
"10 Married-civ-spouse Exec-managerial Husband \n", | |
"11 Married-civ-spouse Prof-specialty Husband \n", | |
"12 Never-married Adm-clerical Own-child \n", | |
"13 Never-married Sales Not-in-family \n", | |
"14 Married-civ-spouse Craft-repair Husband \n", | |
"15 Married-civ-spouse Transport-moving Husband \n", | |
"16 Never-married Farming-fishing Own-child \n", | |
"17 Never-married Machine-op-inspct Unmarried \n", | |
"18 Married-civ-spouse Sales Husband \n", | |
"19 Divorced Exec-managerial Unmarried \n", | |
"20 Married-civ-spouse Prof-specialty Husband \n", | |
"21 Separated Other-service Unmarried \n", | |
"22 Married-civ-spouse Farming-fishing Husband \n", | |
"23 Married-civ-spouse Transport-moving Husband \n", | |
"24 Divorced Tech-support Unmarried \n", | |
"25 Married-civ-spouse Tech-support Husband \n", | |
"26 Never-married Craft-repair Own-child \n", | |
"27 Married-civ-spouse ? Husband \n", | |
"28 Divorced Exec-managerial Not-in-family \n", | |
"29 Married-civ-spouse Craft-repair Husband \n", | |
"\n", | |
" race sex capital-gain capital-loss hours-per-week \\\n", | |
"0 White Male 2174 0 40 \n", | |
"1 White Male 0 0 13 \n", | |
"2 White Male 0 0 40 \n", | |
"3 Black Male 0 0 40 \n", | |
"4 Black Female 0 0 40 \n", | |
"5 White Female 0 0 40 \n", | |
"6 Black Female 0 0 16 \n", | |
"7 White Male 0 0 45 \n", | |
"8 White Female 14084 0 50 \n", | |
"9 White Male 5178 0 40 \n", | |
"10 Black Male 0 0 80 \n", | |
"11 Asian-Pac-Islander Male 0 0 40 \n", | |
"12 White Female 0 0 30 \n", | |
"13 Black Male 0 0 50 \n", | |
"14 Asian-Pac-Islander Male 0 0 40 \n", | |
"15 Amer-Indian-Eskimo Male 0 0 45 \n", | |
"16 White Male 0 0 35 \n", | |
"17 White Male 0 0 40 \n", | |
"18 White Male 0 0 50 \n", | |
"19 White Female 0 0 45 \n", | |
"20 White Male 0 0 60 \n", | |
"21 Black Female 0 0 20 \n", | |
"22 Black Male 0 0 40 \n", | |
"23 White Male 0 2042 40 \n", | |
"24 White Female 0 0 40 \n", | |
"25 White Male 0 0 40 \n", | |
"26 White Male 0 0 40 \n", | |
"27 Asian-Pac-Islander Male 0 0 60 \n", | |
"28 White Male 0 0 80 \n", | |
"29 White Male 0 0 40 \n", | |
"\n", | |
" native-country income \n", | |
"0 United-States <=50K \n", | |
"1 United-States <=50K \n", | |
"2 United-States <=50K \n", | |
"3 United-States <=50K \n", | |
"4 Cuba <=50K \n", | |
"5 United-States <=50K \n", | |
"6 Jamaica <=50K \n", | |
"7 United-States >50K \n", | |
"8 United-States >50K \n", | |
"9 United-States >50K \n", | |
"10 United-States >50K \n", | |
"11 India >50K \n", | |
"12 United-States <=50K \n", | |
"13 United-States <=50K \n", | |
"14 ? >50K \n", | |
"15 Mexico <=50K \n", | |
"16 United-States <=50K \n", | |
"17 United-States <=50K \n", | |
"18 United-States <=50K \n", | |
"19 United-States >50K \n", | |
"20 United-States >50K \n", | |
"21 United-States <=50K \n", | |
"22 United-States <=50K \n", | |
"23 United-States <=50K \n", | |
"24 United-States <=50K \n", | |
"25 United-States >50K \n", | |
"26 United-States <=50K \n", | |
"27 South >50K \n", | |
"28 United-States <=50K \n", | |
"29 United-States <=50K " | |
] | |
} | |
], | |
"prompt_number": 16 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"adult.describe() #numerical summaries" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>age</th>\n", | |
" <th>fnlwgt</th>\n", | |
" <th>education-num</th>\n", | |
" <th>capital-gain</th>\n", | |
" <th>capital-loss</th>\n", | |
" <th>hours-per-week</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>count</th>\n", | |
" <td> 32561.000000</td>\n", | |
" <td> 32561.000000</td>\n", | |
" <td> 32561.000000</td>\n", | |
" <td> 32561.000000</td>\n", | |
" <td> 32561.000000</td>\n", | |
" <td> 32561.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td> 38.581647</td>\n", | |
" <td> 189778.366512</td>\n", | |
" <td> 10.080679</td>\n", | |
" <td> 1077.648844</td>\n", | |
" <td> 87.303830</td>\n", | |
" <td> 40.437456</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td> 13.640433</td>\n", | |
" <td> 105549.977697</td>\n", | |
" <td> 2.572720</td>\n", | |
" <td> 7385.292085</td>\n", | |
" <td> 402.960219</td>\n", | |
" <td> 12.347429</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td> 17.000000</td>\n", | |
" <td> 12285.000000</td>\n", | |
" <td> 1.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 1.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td> 28.000000</td>\n", | |
" <td> 117827.000000</td>\n", | |
" <td> 9.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 40.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td> 37.000000</td>\n", | |
" <td> 178356.000000</td>\n", | |
" <td> 10.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 40.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td> 48.000000</td>\n", | |
" <td> 237051.000000</td>\n", | |
" <td> 12.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 45.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td> 90.000000</td>\n", | |
" <td> 1484705.000000</td>\n", | |
" <td> 16.000000</td>\n", | |
" <td> 99999.000000</td>\n", | |
" <td> 4356.000000</td>\n", | |
" <td> 99.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 17, | |
"text": [ | |
" age fnlwgt education-num capital-gain \\\n", | |
"count 32561.000000 32561.000000 32561.000000 32561.000000 \n", | |
"mean 38.581647 189778.366512 10.080679 1077.648844 \n", | |
"std 13.640433 105549.977697 2.572720 7385.292085 \n", | |
"min 17.000000 12285.000000 1.000000 0.000000 \n", | |
"25% 28.000000 117827.000000 9.000000 0.000000 \n", | |
"50% 37.000000 178356.000000 10.000000 0.000000 \n", | |
"75% 48.000000 237051.000000 12.000000 0.000000 \n", | |
"max 90.000000 1484705.000000 16.000000 99999.000000 \n", | |
"\n", | |
" capital-loss hours-per-week \n", | |
"count 32561.000000 32561.000000 \n", | |
"mean 87.303830 40.437456 \n", | |
"std 402.960219 12.347429 \n", | |
"min 0.000000 1.000000 \n", | |
"25% 0.000000 40.000000 \n", | |
"50% 0.000000 40.000000 \n", | |
"75% 0.000000 45.000000 \n", | |
"max 4356.000000 99.000000 " | |
] | |
} | |
], | |
"prompt_number": 17 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"workclass=adult.groupby(\"workclass\")" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 18 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"len(workclass)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 19, | |
"text": [ | |
"9" | |
] | |
} | |
], | |
"prompt_number": 19 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"workclass.sum()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>age</th>\n", | |
" <th>fnlwgt</th>\n", | |
" <th>education-num</th>\n", | |
" <th>capital-gain</th>\n", | |
" <th>capital-loss</th>\n", | |
" <th>hours-per-week</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>workclass</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th> ?</th>\n", | |
" <td> 75203</td>\n", | |
" <td> 346115997</td>\n", | |
" <td> 17002</td>\n", | |
" <td> 1114077</td>\n", | |
" <td> 111556</td>\n", | |
" <td> 58604</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Federal-gov</th>\n", | |
" <td> 40887</td>\n", | |
" <td> 177812394</td>\n", | |
" <td> 10535</td>\n", | |
" <td> 799903</td>\n", | |
" <td> 107778</td>\n", | |
" <td> 39724</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Local-gov</th>\n", | |
" <td> 87385</td>\n", | |
" <td> 394822919</td>\n", | |
" <td> 23111</td>\n", | |
" <td> 1842264</td>\n", | |
" <td> 229925</td>\n", | |
" <td> 85777</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Never-worked</th>\n", | |
" <td> 144</td>\n", | |
" <td> 1581927</td>\n", | |
" <td> 52</td>\n", | |
" <td> 0</td>\n", | |
" <td> 0</td>\n", | |
" <td> 199</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Private</th>\n", | |
" <td> 835158</td>\n", | |
" <td> 4374974348</td>\n", | |
" <td> 224230</td>\n", | |
" <td> 20181687</td>\n", | |
" <td> 1815878</td>\n", | |
" <td> 913902</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Self-emp-inc</th>\n", | |
" <td> 51355</td>\n", | |
" <td> 196395180</td>\n", | |
" <td> 12429</td>\n", | |
" <td> 5441274</td>\n", | |
" <td> 173135</td>\n", | |
" <td> 54481</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Self-emp-not-inc</th>\n", | |
" <td> 114268</td>\n", | |
" <td> 446221558</td>\n", | |
" <td> 25985</td>\n", | |
" <td> 4792483</td>\n", | |
" <td> 296361</td>\n", | |
" <td> 112876</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> State-gov</th>\n", | |
" <td> 51188</td>\n", | |
" <td> 239009324</td>\n", | |
" <td> 14766</td>\n", | |
" <td> 910806</td>\n", | |
" <td> 108067</td>\n", | |
" <td> 50663</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Without-pay</th>\n", | |
" <td> 669</td>\n", | |
" <td> 2439745</td>\n", | |
" <td> 127</td>\n", | |
" <td> 6830</td>\n", | |
" <td> 0</td>\n", | |
" <td> 458</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 20, | |
"text": [ | |
" age fnlwgt education-num capital-gain \\\n", | |
"workclass \n", | |
" ? 75203 346115997 17002 1114077 \n", | |
" Federal-gov 40887 177812394 10535 799903 \n", | |
" Local-gov 87385 394822919 23111 1842264 \n", | |
" Never-worked 144 1581927 52 0 \n", | |
" Private 835158 4374974348 224230 20181687 \n", | |
" Self-emp-inc 51355 196395180 12429 5441274 \n", | |
" Self-emp-not-inc 114268 446221558 25985 4792483 \n", | |
" State-gov 51188 239009324 14766 910806 \n", | |
" Without-pay 669 2439745 127 6830 \n", | |
"\n", | |
" capital-loss hours-per-week \n", | |
"workclass \n", | |
" ? 111556 58604 \n", | |
" Federal-gov 107778 39724 \n", | |
" Local-gov 229925 85777 \n", | |
" Never-worked 0 199 \n", | |
" Private 1815878 913902 \n", | |
" Self-emp-inc 173135 54481 \n", | |
" Self-emp-not-inc 296361 112876 \n", | |
" State-gov 108067 50663 \n", | |
" Without-pay 0 458 " | |
] | |
} | |
], | |
"prompt_number": 20 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"workclass.count()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>age</th>\n", | |
" <th>fnlwgt</th>\n", | |
" <th>education</th>\n", | |
" <th>education-num</th>\n", | |
" <th>marital-status</th>\n", | |
" <th>occupation</th>\n", | |
" <th>relationship</th>\n", | |
" <th>race</th>\n", | |
" <th>sex</th>\n", | |
" <th>capital-gain</th>\n", | |
" <th>capital-loss</th>\n", | |
" <th>hours-per-week</th>\n", | |
" <th>native-country</th>\n", | |
" <th>income</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>workclass</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th> ?</th>\n", | |
" <td> 1836</td>\n", | |
" <td> 1836</td>\n", | |
" <td> 1836</td>\n", | |
" <td> 1836</td>\n", | |
" <td> 1836</td>\n", | |
" <td> 1836</td>\n", | |
" <td> 1836</td>\n", | |
" <td> 1836</td>\n", | |
" <td> 1836</td>\n", | |
" <td> 1836</td>\n", | |
" <td> 1836</td>\n", | |
" <td> 1836</td>\n", | |
" <td> 1836</td>\n", | |
" <td> 1836</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Federal-gov</th>\n", | |
" <td> 960</td>\n", | |
" <td> 960</td>\n", | |
" <td> 960</td>\n", | |
" <td> 960</td>\n", | |
" <td> 960</td>\n", | |
" <td> 960</td>\n", | |
" <td> 960</td>\n", | |
" <td> 960</td>\n", | |
" <td> 960</td>\n", | |
" <td> 960</td>\n", | |
" <td> 960</td>\n", | |
" <td> 960</td>\n", | |
" <td> 960</td>\n", | |
" <td> 960</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Local-gov</th>\n", | |
" <td> 2093</td>\n", | |
" <td> 2093</td>\n", | |
" <td> 2093</td>\n", | |
" <td> 2093</td>\n", | |
" <td> 2093</td>\n", | |
" <td> 2093</td>\n", | |
" <td> 2093</td>\n", | |
" <td> 2093</td>\n", | |
" <td> 2093</td>\n", | |
" <td> 2093</td>\n", | |
" <td> 2093</td>\n", | |
" <td> 2093</td>\n", | |
" <td> 2093</td>\n", | |
" <td> 2093</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Never-worked</th>\n", | |
" <td> 7</td>\n", | |
" <td> 7</td>\n", | |
" <td> 7</td>\n", | |
" <td> 7</td>\n", | |
" <td> 7</td>\n", | |
" <td> 7</td>\n", | |
" <td> 7</td>\n", | |
" <td> 7</td>\n", | |
" <td> 7</td>\n", | |
" <td> 7</td>\n", | |
" <td> 7</td>\n", | |
" <td> 7</td>\n", | |
" <td> 7</td>\n", | |
" <td> 7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Private</th>\n", | |
" <td> 22696</td>\n", | |
" <td> 22696</td>\n", | |
" <td> 22696</td>\n", | |
" <td> 22696</td>\n", | |
" <td> 22696</td>\n", | |
" <td> 22696</td>\n", | |
" <td> 22696</td>\n", | |
" <td> 22696</td>\n", | |
" <td> 22696</td>\n", | |
" <td> 22696</td>\n", | |
" <td> 22696</td>\n", | |
" <td> 22696</td>\n", | |
" <td> 22696</td>\n", | |
" <td> 22696</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Self-emp-inc</th>\n", | |
" <td> 1116</td>\n", | |
" <td> 1116</td>\n", | |
" <td> 1116</td>\n", | |
" <td> 1116</td>\n", | |
" <td> 1116</td>\n", | |
" <td> 1116</td>\n", | |
" <td> 1116</td>\n", | |
" <td> 1116</td>\n", | |
" <td> 1116</td>\n", | |
" <td> 1116</td>\n", | |
" <td> 1116</td>\n", | |
" <td> 1116</td>\n", | |
" <td> 1116</td>\n", | |
" <td> 1116</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Self-emp-not-inc</th>\n", | |
" <td> 2541</td>\n", | |
" <td> 2541</td>\n", | |
" <td> 2541</td>\n", | |
" <td> 2541</td>\n", | |
" <td> 2541</td>\n", | |
" <td> 2541</td>\n", | |
" <td> 2541</td>\n", | |
" <td> 2541</td>\n", | |
" <td> 2541</td>\n", | |
" <td> 2541</td>\n", | |
" <td> 2541</td>\n", | |
" <td> 2541</td>\n", | |
" <td> 2541</td>\n", | |
" <td> 2541</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> State-gov</th>\n", | |
" <td> 1298</td>\n", | |
" <td> 1298</td>\n", | |
" <td> 1298</td>\n", | |
" <td> 1298</td>\n", | |
" <td> 1298</td>\n", | |
" <td> 1298</td>\n", | |
" <td> 1298</td>\n", | |
" <td> 1298</td>\n", | |
" <td> 1298</td>\n", | |
" <td> 1298</td>\n", | |
" <td> 1298</td>\n", | |
" <td> 1298</td>\n", | |
" <td> 1298</td>\n", | |
" <td> 1298</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Without-pay</th>\n", | |
" <td> 14</td>\n", | |
" <td> 14</td>\n", | |
" <td> 14</td>\n", | |
" <td> 14</td>\n", | |
" <td> 14</td>\n", | |
" <td> 14</td>\n", | |
" <td> 14</td>\n", | |
" <td> 14</td>\n", | |
" <td> 14</td>\n", | |
" <td> 14</td>\n", | |
" <td> 14</td>\n", | |
" <td> 14</td>\n", | |
" <td> 14</td>\n", | |
" <td> 14</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 21, | |
"text": [ | |
" age fnlwgt education education-num marital-status \\\n", | |
"workclass \n", | |
" ? 1836 1836 1836 1836 1836 \n", | |
" Federal-gov 960 960 960 960 960 \n", | |
" Local-gov 2093 2093 2093 2093 2093 \n", | |
" Never-worked 7 7 7 7 7 \n", | |
" Private 22696 22696 22696 22696 22696 \n", | |
" Self-emp-inc 1116 1116 1116 1116 1116 \n", | |
" Self-emp-not-inc 2541 2541 2541 2541 2541 \n", | |
" State-gov 1298 1298 1298 1298 1298 \n", | |
" Without-pay 14 14 14 14 14 \n", | |
"\n", | |
" occupation relationship race sex capital-gain \\\n", | |
"workclass \n", | |
" ? 1836 1836 1836 1836 1836 \n", | |
" Federal-gov 960 960 960 960 960 \n", | |
" Local-gov 2093 2093 2093 2093 2093 \n", | |
" Never-worked 7 7 7 7 7 \n", | |
" Private 22696 22696 22696 22696 22696 \n", | |
" Self-emp-inc 1116 1116 1116 1116 1116 \n", | |
" Self-emp-not-inc 2541 2541 2541 2541 2541 \n", | |
" State-gov 1298 1298 1298 1298 1298 \n", | |
" Without-pay 14 14 14 14 14 \n", | |
"\n", | |
" capital-loss hours-per-week native-country income \n", | |
"workclass \n", | |
" ? 1836 1836 1836 1836 \n", | |
" Federal-gov 960 960 960 960 \n", | |
" Local-gov 2093 2093 2093 2093 \n", | |
" Never-worked 7 7 7 7 \n", | |
" Private 22696 22696 22696 22696 \n", | |
" Self-emp-inc 1116 1116 1116 1116 \n", | |
" Self-emp-not-inc 2541 2541 2541 2541 \n", | |
" State-gov 1298 1298 1298 1298 \n", | |
" Without-pay 14 14 14 14 " | |
] | |
} | |
], | |
"prompt_number": 21 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"workclass.describe()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>age</th>\n", | |
" <th>capital-gain</th>\n", | |
" <th>capital-loss</th>\n", | |
" <th>education-num</th>\n", | |
" <th>fnlwgt</th>\n", | |
" <th>hours-per-week</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>workclass</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"8\" valign=\"top\"> ?</th>\n", | |
" <th>count</th>\n", | |
" <td> 1836.000000</td>\n", | |
" <td> 1836.000000</td>\n", | |
" <td> 1836.000000</td>\n", | |
" <td> 1836.000000</td>\n", | |
" <td> 1836.000000</td>\n", | |
" <td> 1836.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td> 40.960240</td>\n", | |
" <td> 606.795752</td>\n", | |
" <td> 60.760349</td>\n", | |
" <td> 9.260349</td>\n", | |
" <td> 188516.338235</td>\n", | |
" <td> 31.919390</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td> 20.334587</td>\n", | |
" <td> 5147.323872</td>\n", | |
" <td> 354.685264</td>\n", | |
" <td> 2.601986</td>\n", | |
" <td> 107089.902252</td>\n", | |
" <td> 14.909903</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td> 17.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 1.000000</td>\n", | |
" <td> 12285.000000</td>\n", | |
" <td> 1.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td> 21.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 9.000000</td>\n", | |
" <td> 117771.250000</td>\n", | |
" <td> 20.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td> 35.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 9.000000</td>\n", | |
" <td> 175617.000000</td>\n", | |
" <td> 36.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td> 61.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 10.000000</td>\n", | |
" <td> 234568.500000</td>\n", | |
" <td> 40.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td> 90.000000</td>\n", | |
" <td> 99999.000000</td>\n", | |
" <td> 4356.000000</td>\n", | |
" <td> 16.000000</td>\n", | |
" <td> 981628.000000</td>\n", | |
" <td> 99.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"8\" valign=\"top\"> Federal-gov</th>\n", | |
" <th>count</th>\n", | |
" <td> 960.000000</td>\n", | |
" <td> 960.000000</td>\n", | |
" <td> 960.000000</td>\n", | |
" <td> 960.000000</td>\n", | |
" <td> 960.000000</td>\n", | |
" <td> 960.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td> 42.590625</td>\n", | |
" <td> 833.232292</td>\n", | |
" <td> 112.268750</td>\n", | |
" <td> 10.973958</td>\n", | |
" <td> 185221.243750</td>\n", | |
" <td> 41.379167</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td> 11.509171</td>\n", | |
" <td> 4101.966767</td>\n", | |
" <td> 453.504623</td>\n", | |
" <td> 2.113650</td>\n", | |
" <td> 117502.359524</td>\n", | |
" <td> 8.838605</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td> 17.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 3.000000</td>\n", | |
" <td> 19914.000000</td>\n", | |
" <td> 4.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td> 34.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 9.000000</td>\n", | |
" <td> 97781.250000</td>\n", | |
" <td> 40.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td> 43.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 10.000000</td>\n", | |
" <td> 175771.000000</td>\n", | |
" <td> 40.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td> 51.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 13.000000</td>\n", | |
" <td> 243960.250000</td>\n", | |
" <td> 40.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td> 90.000000</td>\n", | |
" <td> 99999.000000</td>\n", | |
" <td> 3683.000000</td>\n", | |
" <td> 16.000000</td>\n", | |
" <td> 930948.000000</td>\n", | |
" <td> 99.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"8\" valign=\"top\"> Local-gov</th>\n", | |
" <th>count</th>\n", | |
" <td> 2093.000000</td>\n", | |
" <td> 2093.000000</td>\n", | |
" <td> 2093.000000</td>\n", | |
" <td> 2093.000000</td>\n", | |
" <td> 2093.000000</td>\n", | |
" <td> 2093.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td> 41.751075</td>\n", | |
" <td> 880.202580</td>\n", | |
" <td> 109.854276</td>\n", | |
" <td> 11.042045</td>\n", | |
" <td> 188639.712852</td>\n", | |
" <td> 40.982800</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td> 12.272856</td>\n", | |
" <td> 5775.043442</td>\n", | |
" <td> 439.513203</td>\n", | |
" <td> 2.552536</td>\n", | |
" <td> 100254.775314</td>\n", | |
" <td> 10.771559</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td> 17.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 1.000000</td>\n", | |
" <td> 14878.000000</td>\n", | |
" <td> 2.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td> 32.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 9.000000</td>\n", | |
" <td> 121124.000000</td>\n", | |
" <td> 40.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td> 41.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 11.000000</td>\n", | |
" <td> 179580.000000</td>\n", | |
" <td> 40.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td> 50.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 13.000000</td>\n", | |
" <td> 236487.000000</td>\n", | |
" <td> 44.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td> 90.000000</td>\n", | |
" <td> 99999.000000</td>\n", | |
" <td> 2444.000000</td>\n", | |
" <td> 16.000000</td>\n", | |
" <td> 1125613.000000</td>\n", | |
" <td> 99.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"6\" valign=\"top\"> Never-worked</th>\n", | |
" <th>count</th>\n", | |
" <td> 7.000000</td>\n", | |
" <td> 7.000000</td>\n", | |
" <td> 7.000000</td>\n", | |
" <td> 7.000000</td>\n", | |
" <td> 7.000000</td>\n", | |
" <td> 7.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td> 20.571429</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 7.428571</td>\n", | |
" <td> 225989.571429</td>\n", | |
" <td> 28.428571</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td> 4.613644</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 2.299068</td>\n", | |
" <td> 108135.748347</td>\n", | |
" <td> 15.186147</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td> 17.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 4.000000</td>\n", | |
" <td> 153663.000000</td>\n", | |
" <td> 4.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td> 18.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 6.000000</td>\n", | |
" <td> 166902.000000</td>\n", | |
" <td> 20.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td> 18.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 7.000000</td>\n", | |
" <td> 188535.000000</td>\n", | |
" <td> 35.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"6\" valign=\"top\"> Self-emp-inc</th>\n", | |
" <th>std</th>\n", | |
" <td> 12.553194</td>\n", | |
" <td> 17976.548086</td>\n", | |
" <td> 549.488497</td>\n", | |
" <td> 2.603210</td>\n", | |
" <td> 96436.282913</td>\n", | |
" <td> 13.900417</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td> 17.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 2.000000</td>\n", | |
" <td> 21626.000000</td>\n", | |
" <td> 1.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td> 37.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 9.000000</td>\n", | |
" <td> 113539.750000</td>\n", | |
" <td> 40.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td> 45.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 10.000000</td>\n", | |
" <td> 165667.000000</td>\n", | |
" <td> 50.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td> 54.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 13.000000</td>\n", | |
" <td> 213722.750000</td>\n", | |
" <td> 60.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td> 84.000000</td>\n", | |
" <td> 99999.000000</td>\n", | |
" <td> 2559.000000</td>\n", | |
" <td> 16.000000</td>\n", | |
" <td> 1097453.000000</td>\n", | |
" <td> 99.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"8\" valign=\"top\"> Self-emp-not-inc</th>\n", | |
" <th>count</th>\n", | |
" <td> 2541.000000</td>\n", | |
" <td> 2541.000000</td>\n", | |
" <td> 2541.000000</td>\n", | |
" <td> 2541.000000</td>\n", | |
" <td> 2541.000000</td>\n", | |
" <td> 2541.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td> 44.969697</td>\n", | |
" <td> 1886.061787</td>\n", | |
" <td> 116.631641</td>\n", | |
" <td> 10.226289</td>\n", | |
" <td> 175608.641480</td>\n", | |
" <td> 44.421881</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td> 13.338162</td>\n", | |
" <td> 10986.233506</td>\n", | |
" <td> 467.611687</td>\n", | |
" <td> 2.768132</td>\n", | |
" <td> 100735.757730</td>\n", | |
" <td> 16.674958</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td> 17.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 2.000000</td>\n", | |
" <td> 20098.000000</td>\n", | |
" <td> 1.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td> 35.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 9.000000</td>\n", | |
" <td> 104973.000000</td>\n", | |
" <td> 40.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td> 44.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 10.000000</td>\n", | |
" <td> 168109.000000</td>\n", | |
" <td> 40.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td> 54.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 13.000000</td>\n", | |
" <td> 227298.000000</td>\n", | |
" <td> 50.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td> 90.000000</td>\n", | |
" <td> 99999.000000</td>\n", | |
" <td> 2824.000000</td>\n", | |
" <td> 16.000000</td>\n", | |
" <td> 795830.000000</td>\n", | |
" <td> 99.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"8\" valign=\"top\"> State-gov</th>\n", | |
" <th>count</th>\n", | |
" <td> 1298.000000</td>\n", | |
" <td> 1298.000000</td>\n", | |
" <td> 1298.000000</td>\n", | |
" <td> 1298.000000</td>\n", | |
" <td> 1298.000000</td>\n", | |
" <td> 1298.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td> 39.436055</td>\n", | |
" <td> 701.699538</td>\n", | |
" <td> 83.256549</td>\n", | |
" <td> 11.375963</td>\n", | |
" <td> 184136.613251</td>\n", | |
" <td> 39.031587</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td> 12.431065</td>\n", | |
" <td> 3777.749185</td>\n", | |
" <td> 394.469789</td>\n", | |
" <td> 2.538604</td>\n", | |
" <td> 111512.980926</td>\n", | |
" <td> 11.697014</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td> 17.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 1.000000</td>\n", | |
" <td> 19395.000000</td>\n", | |
" <td> 1.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td> 30.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 9.000000</td>\n", | |
" <td> 108903.750000</td>\n", | |
" <td> 38.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td> 39.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 10.000000</td>\n", | |
" <td> 169402.500000</td>\n", | |
" <td> 40.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td> 48.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 13.000000</td>\n", | |
" <td> 238532.750000</td>\n", | |
" <td> 40.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td> 81.000000</td>\n", | |
" <td> 99999.000000</td>\n", | |
" <td> 3683.000000</td>\n", | |
" <td> 16.000000</td>\n", | |
" <td> 1033222.000000</td>\n", | |
" <td> 99.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th rowspan=\"8\" valign=\"top\"> Without-pay</th>\n", | |
" <th>count</th>\n", | |
" <td> 14.000000</td>\n", | |
" <td> 14.000000</td>\n", | |
" <td> 14.000000</td>\n", | |
" <td> 14.000000</td>\n", | |
" <td> 14.000000</td>\n", | |
" <td> 14.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td> 47.785714</td>\n", | |
" <td> 487.857143</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 9.071429</td>\n", | |
" <td> 174267.500000</td>\n", | |
" <td> 32.714286</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td> 21.075610</td>\n", | |
" <td> 1300.780467</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 1.685426</td>\n", | |
" <td> 85536.385921</td>\n", | |
" <td> 17.357900</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td> 19.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 4.000000</td>\n", | |
" <td> 27012.000000</td>\n", | |
" <td> 10.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td> 23.750000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 9.000000</td>\n", | |
" <td> 138446.500000</td>\n", | |
" <td> 20.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td> 57.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 9.000000</td>\n", | |
" <td> 171531.500000</td>\n", | |
" <td> 27.500000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td> 65.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 9.750000</td>\n", | |
" <td> 209006.500000</td>\n", | |
" <td> 47.500000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td> 72.000000</td>\n", | |
" <td> 4416.000000</td>\n", | |
" <td> 0.000000</td>\n", | |
" <td> 12.000000</td>\n", | |
" <td> 344858.000000</td>\n", | |
" <td> 65.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>72 rows \u00d7 6 columns</p>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 22, | |
"text": [ | |
" age capital-gain capital-loss \\\n", | |
"workclass \n", | |
" ? count 1836.000000 1836.000000 1836.000000 \n", | |
" mean 40.960240 606.795752 60.760349 \n", | |
" std 20.334587 5147.323872 354.685264 \n", | |
" min 17.000000 0.000000 0.000000 \n", | |
" 25% 21.000000 0.000000 0.000000 \n", | |
" 50% 35.000000 0.000000 0.000000 \n", | |
" 75% 61.000000 0.000000 0.000000 \n", | |
" max 90.000000 99999.000000 4356.000000 \n", | |
" Federal-gov count 960.000000 960.000000 960.000000 \n", | |
" mean 42.590625 833.232292 112.268750 \n", | |
" std 11.509171 4101.966767 453.504623 \n", | |
" min 17.000000 0.000000 0.000000 \n", | |
" 25% 34.000000 0.000000 0.000000 \n", | |
" 50% 43.000000 0.000000 0.000000 \n", | |
" 75% 51.000000 0.000000 0.000000 \n", | |
" max 90.000000 99999.000000 3683.000000 \n", | |
" Local-gov count 2093.000000 2093.000000 2093.000000 \n", | |
" mean 41.751075 880.202580 109.854276 \n", | |
" std 12.272856 5775.043442 439.513203 \n", | |
" min 17.000000 0.000000 0.000000 \n", | |
" 25% 32.000000 0.000000 0.000000 \n", | |
" 50% 41.000000 0.000000 0.000000 \n", | |
" 75% 50.000000 0.000000 0.000000 \n", | |
" max 90.000000 99999.000000 2444.000000 \n", | |
" Never-worked count 7.000000 7.000000 7.000000 \n", | |
" mean 20.571429 0.000000 0.000000 \n", | |
" std 4.613644 0.000000 0.000000 \n", | |
" min 17.000000 0.000000 0.000000 \n", | |
" 25% 18.000000 0.000000 0.000000 \n", | |
" 50% 18.000000 0.000000 0.000000 \n", | |
"... ... ... ... \n", | |
" Self-emp-inc std 12.553194 17976.548086 549.488497 \n", | |
" min 17.000000 0.000000 0.000000 \n", | |
" 25% 37.000000 0.000000 0.000000 \n", | |
" 50% 45.000000 0.000000 0.000000 \n", | |
" 75% 54.000000 0.000000 0.000000 \n", | |
" max 84.000000 99999.000000 2559.000000 \n", | |
" Self-emp-not-inc count 2541.000000 2541.000000 2541.000000 \n", | |
" mean 44.969697 1886.061787 116.631641 \n", | |
" std 13.338162 10986.233506 467.611687 \n", | |
" min 17.000000 0.000000 0.000000 \n", | |
" 25% 35.000000 0.000000 0.000000 \n", | |
" 50% 44.000000 0.000000 0.000000 \n", | |
" 75% 54.000000 0.000000 0.000000 \n", | |
" max 90.000000 99999.000000 2824.000000 \n", | |
" State-gov count 1298.000000 1298.000000 1298.000000 \n", | |
" mean 39.436055 701.699538 83.256549 \n", | |
" std 12.431065 3777.749185 394.469789 \n", | |
" min 17.000000 0.000000 0.000000 \n", | |
" 25% 30.000000 0.000000 0.000000 \n", | |
" 50% 39.000000 0.000000 0.000000 \n", | |
" 75% 48.000000 0.000000 0.000000 \n", | |
" max 81.000000 99999.000000 3683.000000 \n", | |
" Without-pay count 14.000000 14.000000 14.000000 \n", | |
" mean 47.785714 487.857143 0.000000 \n", | |
" std 21.075610 1300.780467 0.000000 \n", | |
" min 19.000000 0.000000 0.000000 \n", | |
" 25% 23.750000 0.000000 0.000000 \n", | |
" 50% 57.000000 0.000000 0.000000 \n", | |
" 75% 65.000000 0.000000 0.000000 \n", | |
" max 72.000000 4416.000000 0.000000 \n", | |
"\n", | |
" education-num fnlwgt hours-per-week \n", | |
"workclass \n", | |
" ? count 1836.000000 1836.000000 1836.000000 \n", | |
" mean 9.260349 188516.338235 31.919390 \n", | |
" std 2.601986 107089.902252 14.909903 \n", | |
" min 1.000000 12285.000000 1.000000 \n", | |
" 25% 9.000000 117771.250000 20.000000 \n", | |
" 50% 9.000000 175617.000000 36.000000 \n", | |
" 75% 10.000000 234568.500000 40.000000 \n", | |
" max 16.000000 981628.000000 99.000000 \n", | |
" Federal-gov count 960.000000 960.000000 960.000000 \n", | |
" mean 10.973958 185221.243750 41.379167 \n", | |
" std 2.113650 117502.359524 8.838605 \n", | |
" min 3.000000 19914.000000 4.000000 \n", | |
" 25% 9.000000 97781.250000 40.000000 \n", | |
" 50% 10.000000 175771.000000 40.000000 \n", | |
" 75% 13.000000 243960.250000 40.000000 \n", | |
" max 16.000000 930948.000000 99.000000 \n", | |
" Local-gov count 2093.000000 2093.000000 2093.000000 \n", | |
" mean 11.042045 188639.712852 40.982800 \n", | |
" std 2.552536 100254.775314 10.771559 \n", | |
" min 1.000000 14878.000000 2.000000 \n", | |
" 25% 9.000000 121124.000000 40.000000 \n", | |
" 50% 11.000000 179580.000000 40.000000 \n", | |
" 75% 13.000000 236487.000000 44.000000 \n", | |
" max 16.000000 1125613.000000 99.000000 \n", | |
" Never-worked count 7.000000 7.000000 7.000000 \n", | |
" mean 7.428571 225989.571429 28.428571 \n", | |
" std 2.299068 108135.748347 15.186147 \n", | |
" min 4.000000 153663.000000 4.000000 \n", | |
" 25% 6.000000 166902.000000 20.000000 \n", | |
" 50% 7.000000 188535.000000 35.000000 \n", | |
"... ... ... ... \n", | |
" Self-emp-inc std 2.603210 96436.282913 13.900417 \n", | |
" min 2.000000 21626.000000 1.000000 \n", | |
" 25% 9.000000 113539.750000 40.000000 \n", | |
" 50% 10.000000 165667.000000 50.000000 \n", | |
" 75% 13.000000 213722.750000 60.000000 \n", | |
" max 16.000000 1097453.000000 99.000000 \n", | |
" Self-emp-not-inc count 2541.000000 2541.000000 2541.000000 \n", | |
" mean 10.226289 175608.641480 44.421881 \n", | |
" std 2.768132 100735.757730 16.674958 \n", | |
" min 2.000000 20098.000000 1.000000 \n", | |
" 25% 9.000000 104973.000000 40.000000 \n", | |
" 50% 10.000000 168109.000000 40.000000 \n", | |
" 75% 13.000000 227298.000000 50.000000 \n", | |
" max 16.000000 795830.000000 99.000000 \n", | |
" State-gov count 1298.000000 1298.000000 1298.000000 \n", | |
" mean 11.375963 184136.613251 39.031587 \n", | |
" std 2.538604 111512.980926 11.697014 \n", | |
" min 1.000000 19395.000000 1.000000 \n", | |
" 25% 9.000000 108903.750000 38.000000 \n", | |
" 50% 10.000000 169402.500000 40.000000 \n", | |
" 75% 13.000000 238532.750000 40.000000 \n", | |
" max 16.000000 1033222.000000 99.000000 \n", | |
" Without-pay count 14.000000 14.000000 14.000000 \n", | |
" mean 9.071429 174267.500000 32.714286 \n", | |
" std 1.685426 85536.385921 17.357900 \n", | |
" min 4.000000 27012.000000 10.000000 \n", | |
" 25% 9.000000 138446.500000 20.000000 \n", | |
" 50% 9.000000 171531.500000 27.500000 \n", | |
" 75% 9.750000 209006.500000 47.500000 \n", | |
" max 12.000000 344858.000000 65.000000 \n", | |
"\n", | |
"[72 rows x 6 columns]" | |
] | |
} | |
], | |
"prompt_number": 22 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"race=adult.groupby(\"race\")" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 23 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"race.sum()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>age</th>\n", | |
" <th>fnlwgt</th>\n", | |
" <th>education-num</th>\n", | |
" <th>capital-gain</th>\n", | |
" <th>capital-loss</th>\n", | |
" <th>hours-per-week</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>race</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th> Amer-Indian-Eskimo</th>\n", | |
" <td> 11561</td>\n", | |
" <td> 37578487</td>\n", | |
" <td> 2896</td>\n", | |
" <td> 194458</td>\n", | |
" <td> 10629</td>\n", | |
" <td> 12455</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Asian-Pac-Islander</th>\n", | |
" <td> 39219</td>\n", | |
" <td> 166178293</td>\n", | |
" <td> 11388</td>\n", | |
" <td> 1536014</td>\n", | |
" <td> 101014</td>\n", | |
" <td> 41692</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Black</th>\n", | |
" <td> 117987</td>\n", | |
" <td> 712313000</td>\n", | |
" <td> 29635</td>\n", | |
" <td> 1905454</td>\n", | |
" <td> 188643</td>\n", | |
" <td> 120033</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Other</th>\n", | |
" <td> 9067</td>\n", | |
" <td> 53420656</td>\n", | |
" <td> 2396</td>\n", | |
" <td> 253293</td>\n", | |
" <td> 16550</td>\n", | |
" <td> 10696</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> White</th>\n", | |
" <td> 1078423</td>\n", | |
" <td> 5209882956</td>\n", | |
" <td> 281922</td>\n", | |
" <td> 31200105</td>\n", | |
" <td> 2525864</td>\n", | |
" <td> 1131808</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 24, | |
"text": [ | |
" age fnlwgt education-num capital-gain \\\n", | |
"race \n", | |
" Amer-Indian-Eskimo 11561 37578487 2896 194458 \n", | |
" Asian-Pac-Islander 39219 166178293 11388 1536014 \n", | |
" Black 117987 712313000 29635 1905454 \n", | |
" Other 9067 53420656 2396 253293 \n", | |
" White 1078423 5209882956 281922 31200105 \n", | |
"\n", | |
" capital-loss hours-per-week \n", | |
"race \n", | |
" Amer-Indian-Eskimo 10629 12455 \n", | |
" Asian-Pac-Islander 101014 41692 \n", | |
" Black 188643 120033 \n", | |
" Other 16550 10696 \n", | |
" White 2525864 1131808 " | |
] | |
} | |
], | |
"prompt_number": 24 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"race.mean()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>age</th>\n", | |
" <th>fnlwgt</th>\n", | |
" <th>education-num</th>\n", | |
" <th>capital-gain</th>\n", | |
" <th>capital-loss</th>\n", | |
" <th>hours-per-week</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>race</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th> Amer-Indian-Eskimo</th>\n", | |
" <td> 37.173633</td>\n", | |
" <td> 120831.147910</td>\n", | |
" <td> 9.311897</td>\n", | |
" <td> 625.266881</td>\n", | |
" <td> 34.176849</td>\n", | |
" <td> 40.048232</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Asian-Pac-Islander</th>\n", | |
" <td> 37.746872</td>\n", | |
" <td> 159940.609240</td>\n", | |
" <td> 10.960539</td>\n", | |
" <td> 1478.358037</td>\n", | |
" <td> 97.222329</td>\n", | |
" <td> 40.127045</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Black</th>\n", | |
" <td> 37.767926</td>\n", | |
" <td> 228013.124200</td>\n", | |
" <td> 9.486236</td>\n", | |
" <td> 609.940461</td>\n", | |
" <td> 60.385083</td>\n", | |
" <td> 38.422855</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Other</th>\n", | |
" <td> 33.457565</td>\n", | |
" <td> 197124.191882</td>\n", | |
" <td> 8.841328</td>\n", | |
" <td> 934.660517</td>\n", | |
" <td> 61.070111</td>\n", | |
" <td> 39.468635</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> White</th>\n", | |
" <td> 38.769881</td>\n", | |
" <td> 187298.064280</td>\n", | |
" <td> 10.135246</td>\n", | |
" <td> 1121.660375</td>\n", | |
" <td> 90.806155</td>\n", | |
" <td> 40.689100</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 25, | |
"text": [ | |
" age fnlwgt education-num capital-gain \\\n", | |
"race \n", | |
" Amer-Indian-Eskimo 37.173633 120831.147910 9.311897 625.266881 \n", | |
" Asian-Pac-Islander 37.746872 159940.609240 10.960539 1478.358037 \n", | |
" Black 37.767926 228013.124200 9.486236 609.940461 \n", | |
" Other 33.457565 197124.191882 8.841328 934.660517 \n", | |
" White 38.769881 187298.064280 10.135246 1121.660375 \n", | |
"\n", | |
" capital-loss hours-per-week \n", | |
"race \n", | |
" Amer-Indian-Eskimo 34.176849 40.048232 \n", | |
" Asian-Pac-Islander 97.222329 40.127045 \n", | |
" Black 60.385083 38.422855 \n", | |
" Other 61.070111 39.468635 \n", | |
" White 90.806155 40.689100 " | |
] | |
} | |
], | |
"prompt_number": 25 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"pd.crosstab(adult.race, adult.workclass)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>workclass</th>\n", | |
" <th> ?</th>\n", | |
" <th> Federal-gov</th>\n", | |
" <th> Local-gov</th>\n", | |
" <th> Never-worked</th>\n", | |
" <th> Private</th>\n", | |
" <th> Self-emp-inc</th>\n", | |
" <th> Self-emp-not-inc</th>\n", | |
" <th> State-gov</th>\n", | |
" <th> Without-pay</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>race</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th> Amer-Indian-Eskimo</th>\n", | |
" <td> 25</td>\n", | |
" <td> 19</td>\n", | |
" <td> 36</td>\n", | |
" <td> 0</td>\n", | |
" <td> 190</td>\n", | |
" <td> 2</td>\n", | |
" <td> 24</td>\n", | |
" <td> 15</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Asian-Pac-Islander</th>\n", | |
" <td> 65</td>\n", | |
" <td> 44</td>\n", | |
" <td> 39</td>\n", | |
" <td> 0</td>\n", | |
" <td> 713</td>\n", | |
" <td> 46</td>\n", | |
" <td> 73</td>\n", | |
" <td> 58</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Black</th>\n", | |
" <td> 213</td>\n", | |
" <td> 169</td>\n", | |
" <td> 288</td>\n", | |
" <td> 2</td>\n", | |
" <td> 2176</td>\n", | |
" <td> 23</td>\n", | |
" <td> 93</td>\n", | |
" <td> 159</td>\n", | |
" <td> 1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Other</th>\n", | |
" <td> 23</td>\n", | |
" <td> 7</td>\n", | |
" <td> 10</td>\n", | |
" <td> 0</td>\n", | |
" <td> 213</td>\n", | |
" <td> 5</td>\n", | |
" <td> 9</td>\n", | |
" <td> 4</td>\n", | |
" <td> 0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> White</th>\n", | |
" <td> 1510</td>\n", | |
" <td> 721</td>\n", | |
" <td> 1720</td>\n", | |
" <td> 5</td>\n", | |
" <td> 19404</td>\n", | |
" <td> 1040</td>\n", | |
" <td> 2342</td>\n", | |
" <td> 1062</td>\n", | |
" <td> 12</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 26, | |
"text": [ | |
"workclass ? Federal-gov Local-gov Never-worked Private \\\n", | |
"race \n", | |
" Amer-Indian-Eskimo 25 19 36 0 190 \n", | |
" Asian-Pac-Islander 65 44 39 0 713 \n", | |
" Black 213 169 288 2 2176 \n", | |
" Other 23 7 10 0 213 \n", | |
" White 1510 721 1720 5 19404 \n", | |
"\n", | |
"workclass Self-emp-inc Self-emp-not-inc State-gov \\\n", | |
"race \n", | |
" Amer-Indian-Eskimo 2 24 15 \n", | |
" Asian-Pac-Islander 46 73 58 \n", | |
" Black 23 93 159 \n", | |
" Other 5 9 4 \n", | |
" White 1040 2342 1062 \n", | |
"\n", | |
"workclass Without-pay \n", | |
"race \n", | |
" Amer-Indian-Eskimo 0 \n", | |
" Asian-Pac-Islander 1 \n", | |
" Black 1 \n", | |
" Other 0 \n", | |
" White 12 " | |
] | |
} | |
], | |
"prompt_number": 26 | |
}, | |
{ | |
"cell_type": "raw", | |
"metadata": {}, | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"pd.crosstab(adult.race, adult.sex)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>sex</th>\n", | |
" <th> Female</th>\n", | |
" <th> Male</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>race</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th> Amer-Indian-Eskimo</th>\n", | |
" <td> 119</td>\n", | |
" <td> 192</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Asian-Pac-Islander</th>\n", | |
" <td> 346</td>\n", | |
" <td> 693</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Black</th>\n", | |
" <td> 1555</td>\n", | |
" <td> 1569</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> Other</th>\n", | |
" <td> 109</td>\n", | |
" <td> 162</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> White</th>\n", | |
" <td> 8642</td>\n", | |
" <td> 19174</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 27, | |
"text": [ | |
"sex Female Male\n", | |
"race \n", | |
" Amer-Indian-Eskimo 119 192\n", | |
" Asian-Pac-Islander 346 693\n", | |
" Black 1555 1569\n", | |
" Other 109 162\n", | |
" White 8642 19174" | |
] | |
} | |
], | |
"prompt_number": 27 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"pd.crosstab(adult.income, adult.sex)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>sex</th>\n", | |
" <th> Female</th>\n", | |
" <th> Male</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>income</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th> <=50K</th>\n", | |
" <td> 9592</td>\n", | |
" <td> 15128</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> >50K</th>\n", | |
" <td> 1179</td>\n", | |
" <td> 6662</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 28, | |
"text": [ | |
"sex Female Male\n", | |
"income \n", | |
" <=50K 9592 15128\n", | |
" >50K 1179 6662" | |
] | |
} | |
], | |
"prompt_number": 28 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"pd.crosstab(adult.income, adult.race)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th>race</th>\n", | |
" <th> Amer-Indian-Eskimo</th>\n", | |
" <th> Asian-Pac-Islander</th>\n", | |
" <th> Black</th>\n", | |
" <th> Other</th>\n", | |
" <th> White</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>income</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th> <=50K</th>\n", | |
" <td> 275</td>\n", | |
" <td> 763</td>\n", | |
" <td> 2737</td>\n", | |
" <td> 246</td>\n", | |
" <td> 20699</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th> >50K</th>\n", | |
" <td> 36</td>\n", | |
" <td> 276</td>\n", | |
" <td> 387</td>\n", | |
" <td> 25</td>\n", | |
" <td> 7117</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 29, | |
"text": [ | |
"race Amer-Indian-Eskimo Asian-Pac-Islander Black Other White\n", | |
"income \n", | |
" <=50K 275 763 2737 246 20699\n", | |
" >50K 36 276 387 25 7117" | |
] | |
} | |
], | |
"prompt_number": 29 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"adult.corr(method='pearson', min_periods=1)\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>age</th>\n", | |
" <th>fnlwgt</th>\n", | |
" <th>education-num</th>\n", | |
" <th>capital-gain</th>\n", | |
" <th>capital-loss</th>\n", | |
" <th>hours-per-week</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>age</th>\n", | |
" <td> 1.000000</td>\n", | |
" <td>-0.076646</td>\n", | |
" <td> 0.036527</td>\n", | |
" <td> 0.077674</td>\n", | |
" <td> 0.057775</td>\n", | |
" <td> 0.068756</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>fnlwgt</th>\n", | |
" <td>-0.076646</td>\n", | |
" <td> 1.000000</td>\n", | |
" <td>-0.043195</td>\n", | |
" <td> 0.000432</td>\n", | |
" <td>-0.010252</td>\n", | |
" <td>-0.018768</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>education-num</th>\n", | |
" <td> 0.036527</td>\n", | |
" <td>-0.043195</td>\n", | |
" <td> 1.000000</td>\n", | |
" <td> 0.122630</td>\n", | |
" <td> 0.079923</td>\n", | |
" <td> 0.148123</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>capital-gain</th>\n", | |
" <td> 0.077674</td>\n", | |
" <td> 0.000432</td>\n", | |
" <td> 0.122630</td>\n", | |
" <td> 1.000000</td>\n", | |
" <td>-0.031615</td>\n", | |
" <td> 0.078409</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>capital-loss</th>\n", | |
" <td> 0.057775</td>\n", | |
" <td>-0.010252</td>\n", | |
" <td> 0.079923</td>\n", | |
" <td>-0.031615</td>\n", | |
" <td> 1.000000</td>\n", | |
" <td> 0.054256</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>hours-per-week</th>\n", | |
" <td> 0.068756</td>\n", | |
" <td>-0.018768</td>\n", | |
" <td> 0.148123</td>\n", | |
" <td> 0.078409</td>\n", | |
" <td> 0.054256</td>\n", | |
" <td> 1.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 35, | |
"text": [ | |
" age fnlwgt education-num capital-gain capital-loss \\\n", | |
"age 1.000000 -0.076646 0.036527 0.077674 0.057775 \n", | |
"fnlwgt -0.076646 1.000000 -0.043195 0.000432 -0.010252 \n", | |
"education-num 0.036527 -0.043195 1.000000 0.122630 0.079923 \n", | |
"capital-gain 0.077674 0.000432 0.122630 1.000000 -0.031615 \n", | |
"capital-loss 0.057775 -0.010252 0.079923 -0.031615 1.000000 \n", | |
"hours-per-week 0.068756 -0.018768 0.148123 0.078409 0.054256 \n", | |
"\n", | |
" hours-per-week \n", | |
"age 0.068756 \n", | |
"fnlwgt -0.018768 \n", | |
"education-num 0.148123 \n", | |
"capital-gain 0.078409 \n", | |
"capital-loss 0.054256 \n", | |
"hours-per-week 1.000000 " | |
] | |
} | |
], | |
"prompt_number": 35 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment