Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
{
"metadata": {
"name": "",
"signature": "sha256:1fbc7d13b08868781650d566f6ad0c1b1f1d475a8a58c561db89ddf159f3fda7"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd #importing packages\n",
"import os as os\n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#pd.describe_option() #describe options for customizing"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#pd.get_option(\"display.memory_usage\")#setting some options"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"os.getcwd() #current working directory\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 4,
"text": [
"'/home/ajay'"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"os.chdir('/home/ajay/Desktop')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"os.getcwd()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 6,
"text": [
"'/home/ajay/Desktop'"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a=os.getcwd()\n",
"os.listdir(a)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 7,
"text": [
"['adult.data']"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"names2=[\"age\",\"workclass\",\"fnlwgt\",\"education\",\"education-num\",\"marital-status\",\"occupation\",\"relationship\",\"race\",\"sex\",\"capital-gain\",\"capital-loss\",\"hours-per-week\",\"native-country\",\"income\"]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"len(names2)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 9,
"text": [
"15"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"adult=pd.read_csv(\"adult.data\",header=None)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"len(adult)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 11,
"text": [
"32562"
]
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"adult.columns"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 12,
"text": [
"Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], dtype='int64')"
]
}
],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"adult.info()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 32562 entries, 0 to 32561\n",
"Data columns (total 15 columns):\n",
"0 32561 non-null float64\n",
"1 32561 non-null object\n",
"2 32561 non-null float64\n",
"3 32561 non-null object\n",
"4 32561 non-null float64\n",
"5 32561 non-null object\n",
"6 32561 non-null object\n",
"7 32561 non-null object\n",
"8 32561 non-null object\n",
"9 32561 non-null object\n",
"10 32561 non-null float64\n",
"11 32561 non-null float64\n",
"12 32561 non-null float64\n",
"13 32561 non-null object\n",
"14 32561 non-null object\n",
"dtypes: float64(6), object(9)"
]
}
],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"adult.head(8)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <th>10</th>\n",
" <th>11</th>\n",
" <th>12</th>\n",
" <th>13</th>\n",
" <th>14</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 39</td>\n",
" <td> State-gov</td>\n",
" <td> 77516</td>\n",
" <td> Bachelors</td>\n",
" <td> 13</td>\n",
" <td> Never-married</td>\n",
" <td> Adm-clerical</td>\n",
" <td> Not-in-family</td>\n",
" <td> White</td>\n",
" <td> Male</td>\n",
" <td> 2174</td>\n",
" <td> 0</td>\n",
" <td> 40</td>\n",
" <td> United-States</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 50</td>\n",
" <td> Self-emp-not-inc</td>\n",
" <td> 83311</td>\n",
" <td> Bachelors</td>\n",
" <td> 13</td>\n",
" <td> Married-civ-spouse</td>\n",
" <td> Exec-managerial</td>\n",
" <td> Husband</td>\n",
" <td> White</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 13</td>\n",
" <td> United-States</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 38</td>\n",
" <td> Private</td>\n",
" <td> 215646</td>\n",
" <td> HS-grad</td>\n",
" <td> 9</td>\n",
" <td> Divorced</td>\n",
" <td> Handlers-cleaners</td>\n",
" <td> Not-in-family</td>\n",
" <td> White</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 40</td>\n",
" <td> United-States</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 53</td>\n",
" <td> Private</td>\n",
" <td> 234721</td>\n",
" <td> 11th</td>\n",
" <td> 7</td>\n",
" <td> Married-civ-spouse</td>\n",
" <td> Handlers-cleaners</td>\n",
" <td> Husband</td>\n",
" <td> Black</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 40</td>\n",
" <td> United-States</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> 28</td>\n",
" <td> Private</td>\n",
" <td> 338409</td>\n",
" <td> Bachelors</td>\n",
" <td> 13</td>\n",
" <td> Married-civ-spouse</td>\n",
" <td> Prof-specialty</td>\n",
" <td> Wife</td>\n",
" <td> Black</td>\n",
" <td> Female</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 40</td>\n",
" <td> Cuba</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td> 37</td>\n",
" <td> Private</td>\n",
" <td> 284582</td>\n",
" <td> Masters</td>\n",
" <td> 14</td>\n",
" <td> Married-civ-spouse</td>\n",
" <td> Exec-managerial</td>\n",
" <td> Wife</td>\n",
" <td> White</td>\n",
" <td> Female</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 40</td>\n",
" <td> United-States</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td> 49</td>\n",
" <td> Private</td>\n",
" <td> 160187</td>\n",
" <td> 9th</td>\n",
" <td> 5</td>\n",
" <td> Married-spouse-absent</td>\n",
" <td> Other-service</td>\n",
" <td> Not-in-family</td>\n",
" <td> Black</td>\n",
" <td> Female</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 16</td>\n",
" <td> Jamaica</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td> 52</td>\n",
" <td> Self-emp-not-inc</td>\n",
" <td> 209642</td>\n",
" <td> HS-grad</td>\n",
" <td> 9</td>\n",
" <td> Married-civ-spouse</td>\n",
" <td> Exec-managerial</td>\n",
" <td> Husband</td>\n",
" <td> White</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 45</td>\n",
" <td> United-States</td>\n",
" <td> &gt;50K</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 14,
"text": [
" 0 1 2 3 4 5 \\\n",
"0 39 State-gov 77516 Bachelors 13 Never-married \n",
"1 50 Self-emp-not-inc 83311 Bachelors 13 Married-civ-spouse \n",
"2 38 Private 215646 HS-grad 9 Divorced \n",
"3 53 Private 234721 11th 7 Married-civ-spouse \n",
"4 28 Private 338409 Bachelors 13 Married-civ-spouse \n",
"5 37 Private 284582 Masters 14 Married-civ-spouse \n",
"6 49 Private 160187 9th 5 Married-spouse-absent \n",
"7 52 Self-emp-not-inc 209642 HS-grad 9 Married-civ-spouse \n",
"\n",
" 6 7 8 9 10 11 12 \\\n",
"0 Adm-clerical Not-in-family White Male 2174 0 40 \n",
"1 Exec-managerial Husband White Male 0 0 13 \n",
"2 Handlers-cleaners Not-in-family White Male 0 0 40 \n",
"3 Handlers-cleaners Husband Black Male 0 0 40 \n",
"4 Prof-specialty Wife Black Female 0 0 40 \n",
"5 Exec-managerial Wife White Female 0 0 40 \n",
"6 Other-service Not-in-family Black Female 0 0 16 \n",
"7 Exec-managerial Husband White Male 0 0 45 \n",
"\n",
" 13 14 \n",
"0 United-States <=50K \n",
"1 United-States <=50K \n",
"2 United-States <=50K \n",
"3 United-States <=50K \n",
"4 Cuba <=50K \n",
"5 United-States <=50K \n",
"6 Jamaica <=50K \n",
"7 United-States >50K "
]
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"adult.columns= names2"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 15
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"adult.head(30)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>workclass</th>\n",
" <th>fnlwgt</th>\n",
" <th>education</th>\n",
" <th>education-num</th>\n",
" <th>marital-status</th>\n",
" <th>occupation</th>\n",
" <th>relationship</th>\n",
" <th>race</th>\n",
" <th>sex</th>\n",
" <th>capital-gain</th>\n",
" <th>capital-loss</th>\n",
" <th>hours-per-week</th>\n",
" <th>native-country</th>\n",
" <th>income</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0 </th>\n",
" <td> 39</td>\n",
" <td> State-gov</td>\n",
" <td> 77516</td>\n",
" <td> Bachelors</td>\n",
" <td> 13</td>\n",
" <td> Never-married</td>\n",
" <td> Adm-clerical</td>\n",
" <td> Not-in-family</td>\n",
" <td> White</td>\n",
" <td> Male</td>\n",
" <td> 2174</td>\n",
" <td> 0</td>\n",
" <td> 40</td>\n",
" <td> United-States</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1 </th>\n",
" <td> 50</td>\n",
" <td> Self-emp-not-inc</td>\n",
" <td> 83311</td>\n",
" <td> Bachelors</td>\n",
" <td> 13</td>\n",
" <td> Married-civ-spouse</td>\n",
" <td> Exec-managerial</td>\n",
" <td> Husband</td>\n",
" <td> White</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 13</td>\n",
" <td> United-States</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2 </th>\n",
" <td> 38</td>\n",
" <td> Private</td>\n",
" <td> 215646</td>\n",
" <td> HS-grad</td>\n",
" <td> 9</td>\n",
" <td> Divorced</td>\n",
" <td> Handlers-cleaners</td>\n",
" <td> Not-in-family</td>\n",
" <td> White</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 40</td>\n",
" <td> United-States</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3 </th>\n",
" <td> 53</td>\n",
" <td> Private</td>\n",
" <td> 234721</td>\n",
" <td> 11th</td>\n",
" <td> 7</td>\n",
" <td> Married-civ-spouse</td>\n",
" <td> Handlers-cleaners</td>\n",
" <td> Husband</td>\n",
" <td> Black</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 40</td>\n",
" <td> United-States</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4 </th>\n",
" <td> 28</td>\n",
" <td> Private</td>\n",
" <td> 338409</td>\n",
" <td> Bachelors</td>\n",
" <td> 13</td>\n",
" <td> Married-civ-spouse</td>\n",
" <td> Prof-specialty</td>\n",
" <td> Wife</td>\n",
" <td> Black</td>\n",
" <td> Female</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 40</td>\n",
" <td> Cuba</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5 </th>\n",
" <td> 37</td>\n",
" <td> Private</td>\n",
" <td> 284582</td>\n",
" <td> Masters</td>\n",
" <td> 14</td>\n",
" <td> Married-civ-spouse</td>\n",
" <td> Exec-managerial</td>\n",
" <td> Wife</td>\n",
" <td> White</td>\n",
" <td> Female</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 40</td>\n",
" <td> United-States</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6 </th>\n",
" <td> 49</td>\n",
" <td> Private</td>\n",
" <td> 160187</td>\n",
" <td> 9th</td>\n",
" <td> 5</td>\n",
" <td> Married-spouse-absent</td>\n",
" <td> Other-service</td>\n",
" <td> Not-in-family</td>\n",
" <td> Black</td>\n",
" <td> Female</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 16</td>\n",
" <td> Jamaica</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7 </th>\n",
" <td> 52</td>\n",
" <td> Self-emp-not-inc</td>\n",
" <td> 209642</td>\n",
" <td> HS-grad</td>\n",
" <td> 9</td>\n",
" <td> Married-civ-spouse</td>\n",
" <td> Exec-managerial</td>\n",
" <td> Husband</td>\n",
" <td> White</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 45</td>\n",
" <td> United-States</td>\n",
" <td> &gt;50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8 </th>\n",
" <td> 31</td>\n",
" <td> Private</td>\n",
" <td> 45781</td>\n",
" <td> Masters</td>\n",
" <td> 14</td>\n",
" <td> Never-married</td>\n",
" <td> Prof-specialty</td>\n",
" <td> Not-in-family</td>\n",
" <td> White</td>\n",
" <td> Female</td>\n",
" <td> 14084</td>\n",
" <td> 0</td>\n",
" <td> 50</td>\n",
" <td> United-States</td>\n",
" <td> &gt;50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9 </th>\n",
" <td> 42</td>\n",
" <td> Private</td>\n",
" <td> 159449</td>\n",
" <td> Bachelors</td>\n",
" <td> 13</td>\n",
" <td> Married-civ-spouse</td>\n",
" <td> Exec-managerial</td>\n",
" <td> Husband</td>\n",
" <td> White</td>\n",
" <td> Male</td>\n",
" <td> 5178</td>\n",
" <td> 0</td>\n",
" <td> 40</td>\n",
" <td> United-States</td>\n",
" <td> &gt;50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td> 37</td>\n",
" <td> Private</td>\n",
" <td> 280464</td>\n",
" <td> Some-college</td>\n",
" <td> 10</td>\n",
" <td> Married-civ-spouse</td>\n",
" <td> Exec-managerial</td>\n",
" <td> Husband</td>\n",
" <td> Black</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 80</td>\n",
" <td> United-States</td>\n",
" <td> &gt;50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td> 30</td>\n",
" <td> State-gov</td>\n",
" <td> 141297</td>\n",
" <td> Bachelors</td>\n",
" <td> 13</td>\n",
" <td> Married-civ-spouse</td>\n",
" <td> Prof-specialty</td>\n",
" <td> Husband</td>\n",
" <td> Asian-Pac-Islander</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 40</td>\n",
" <td> India</td>\n",
" <td> &gt;50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td> 23</td>\n",
" <td> Private</td>\n",
" <td> 122272</td>\n",
" <td> Bachelors</td>\n",
" <td> 13</td>\n",
" <td> Never-married</td>\n",
" <td> Adm-clerical</td>\n",
" <td> Own-child</td>\n",
" <td> White</td>\n",
" <td> Female</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 30</td>\n",
" <td> United-States</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td> 32</td>\n",
" <td> Private</td>\n",
" <td> 205019</td>\n",
" <td> Assoc-acdm</td>\n",
" <td> 12</td>\n",
" <td> Never-married</td>\n",
" <td> Sales</td>\n",
" <td> Not-in-family</td>\n",
" <td> Black</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 50</td>\n",
" <td> United-States</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td> 40</td>\n",
" <td> Private</td>\n",
" <td> 121772</td>\n",
" <td> Assoc-voc</td>\n",
" <td> 11</td>\n",
" <td> Married-civ-spouse</td>\n",
" <td> Craft-repair</td>\n",
" <td> Husband</td>\n",
" <td> Asian-Pac-Islander</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 40</td>\n",
" <td> ?</td>\n",
" <td> &gt;50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td> 34</td>\n",
" <td> Private</td>\n",
" <td> 245487</td>\n",
" <td> 7th-8th</td>\n",
" <td> 4</td>\n",
" <td> Married-civ-spouse</td>\n",
" <td> Transport-moving</td>\n",
" <td> Husband</td>\n",
" <td> Amer-Indian-Eskimo</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 45</td>\n",
" <td> Mexico</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td> 25</td>\n",
" <td> Self-emp-not-inc</td>\n",
" <td> 176756</td>\n",
" <td> HS-grad</td>\n",
" <td> 9</td>\n",
" <td> Never-married</td>\n",
" <td> Farming-fishing</td>\n",
" <td> Own-child</td>\n",
" <td> White</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 35</td>\n",
" <td> United-States</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td> 32</td>\n",
" <td> Private</td>\n",
" <td> 186824</td>\n",
" <td> HS-grad</td>\n",
" <td> 9</td>\n",
" <td> Never-married</td>\n",
" <td> Machine-op-inspct</td>\n",
" <td> Unmarried</td>\n",
" <td> White</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 40</td>\n",
" <td> United-States</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td> 38</td>\n",
" <td> Private</td>\n",
" <td> 28887</td>\n",
" <td> 11th</td>\n",
" <td> 7</td>\n",
" <td> Married-civ-spouse</td>\n",
" <td> Sales</td>\n",
" <td> Husband</td>\n",
" <td> White</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 50</td>\n",
" <td> United-States</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td> 43</td>\n",
" <td> Self-emp-not-inc</td>\n",
" <td> 292175</td>\n",
" <td> Masters</td>\n",
" <td> 14</td>\n",
" <td> Divorced</td>\n",
" <td> Exec-managerial</td>\n",
" <td> Unmarried</td>\n",
" <td> White</td>\n",
" <td> Female</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 45</td>\n",
" <td> United-States</td>\n",
" <td> &gt;50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td> 40</td>\n",
" <td> Private</td>\n",
" <td> 193524</td>\n",
" <td> Doctorate</td>\n",
" <td> 16</td>\n",
" <td> Married-civ-spouse</td>\n",
" <td> Prof-specialty</td>\n",
" <td> Husband</td>\n",
" <td> White</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 60</td>\n",
" <td> United-States</td>\n",
" <td> &gt;50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td> 54</td>\n",
" <td> Private</td>\n",
" <td> 302146</td>\n",
" <td> HS-grad</td>\n",
" <td> 9</td>\n",
" <td> Separated</td>\n",
" <td> Other-service</td>\n",
" <td> Unmarried</td>\n",
" <td> Black</td>\n",
" <td> Female</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 20</td>\n",
" <td> United-States</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td> 35</td>\n",
" <td> Federal-gov</td>\n",
" <td> 76845</td>\n",
" <td> 9th</td>\n",
" <td> 5</td>\n",
" <td> Married-civ-spouse</td>\n",
" <td> Farming-fishing</td>\n",
" <td> Husband</td>\n",
" <td> Black</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 40</td>\n",
" <td> United-States</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td> 43</td>\n",
" <td> Private</td>\n",
" <td> 117037</td>\n",
" <td> 11th</td>\n",
" <td> 7</td>\n",
" <td> Married-civ-spouse</td>\n",
" <td> Transport-moving</td>\n",
" <td> Husband</td>\n",
" <td> White</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 2042</td>\n",
" <td> 40</td>\n",
" <td> United-States</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td> 59</td>\n",
" <td> Private</td>\n",
" <td> 109015</td>\n",
" <td> HS-grad</td>\n",
" <td> 9</td>\n",
" <td> Divorced</td>\n",
" <td> Tech-support</td>\n",
" <td> Unmarried</td>\n",
" <td> White</td>\n",
" <td> Female</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 40</td>\n",
" <td> United-States</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td> 56</td>\n",
" <td> Local-gov</td>\n",
" <td> 216851</td>\n",
" <td> Bachelors</td>\n",
" <td> 13</td>\n",
" <td> Married-civ-spouse</td>\n",
" <td> Tech-support</td>\n",
" <td> Husband</td>\n",
" <td> White</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 40</td>\n",
" <td> United-States</td>\n",
" <td> &gt;50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td> 19</td>\n",
" <td> Private</td>\n",
" <td> 168294</td>\n",
" <td> HS-grad</td>\n",
" <td> 9</td>\n",
" <td> Never-married</td>\n",
" <td> Craft-repair</td>\n",
" <td> Own-child</td>\n",
" <td> White</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 40</td>\n",
" <td> United-States</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td> 54</td>\n",
" <td> ?</td>\n",
" <td> 180211</td>\n",
" <td> Some-college</td>\n",
" <td> 10</td>\n",
" <td> Married-civ-spouse</td>\n",
" <td> ?</td>\n",
" <td> Husband</td>\n",
" <td> Asian-Pac-Islander</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 60</td>\n",
" <td> South</td>\n",
" <td> &gt;50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td> 39</td>\n",
" <td> Private</td>\n",
" <td> 367260</td>\n",
" <td> HS-grad</td>\n",
" <td> 9</td>\n",
" <td> Divorced</td>\n",
" <td> Exec-managerial</td>\n",
" <td> Not-in-family</td>\n",
" <td> White</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 80</td>\n",
" <td> United-States</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td> 49</td>\n",
" <td> Private</td>\n",
" <td> 193366</td>\n",
" <td> HS-grad</td>\n",
" <td> 9</td>\n",
" <td> Married-civ-spouse</td>\n",
" <td> Craft-repair</td>\n",
" <td> Husband</td>\n",
" <td> White</td>\n",
" <td> Male</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 40</td>\n",
" <td> United-States</td>\n",
" <td> &lt;=50K</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 16,
"text": [
" age workclass fnlwgt education education-num \\\n",
"0 39 State-gov 77516 Bachelors 13 \n",
"1 50 Self-emp-not-inc 83311 Bachelors 13 \n",
"2 38 Private 215646 HS-grad 9 \n",
"3 53 Private 234721 11th 7 \n",
"4 28 Private 338409 Bachelors 13 \n",
"5 37 Private 284582 Masters 14 \n",
"6 49 Private 160187 9th 5 \n",
"7 52 Self-emp-not-inc 209642 HS-grad 9 \n",
"8 31 Private 45781 Masters 14 \n",
"9 42 Private 159449 Bachelors 13 \n",
"10 37 Private 280464 Some-college 10 \n",
"11 30 State-gov 141297 Bachelors 13 \n",
"12 23 Private 122272 Bachelors 13 \n",
"13 32 Private 205019 Assoc-acdm 12 \n",
"14 40 Private 121772 Assoc-voc 11 \n",
"15 34 Private 245487 7th-8th 4 \n",
"16 25 Self-emp-not-inc 176756 HS-grad 9 \n",
"17 32 Private 186824 HS-grad 9 \n",
"18 38 Private 28887 11th 7 \n",
"19 43 Self-emp-not-inc 292175 Masters 14 \n",
"20 40 Private 193524 Doctorate 16 \n",
"21 54 Private 302146 HS-grad 9 \n",
"22 35 Federal-gov 76845 9th 5 \n",
"23 43 Private 117037 11th 7 \n",
"24 59 Private 109015 HS-grad 9 \n",
"25 56 Local-gov 216851 Bachelors 13 \n",
"26 19 Private 168294 HS-grad 9 \n",
"27 54 ? 180211 Some-college 10 \n",
"28 39 Private 367260 HS-grad 9 \n",
"29 49 Private 193366 HS-grad 9 \n",
"\n",
" marital-status occupation relationship \\\n",
"0 Never-married Adm-clerical Not-in-family \n",
"1 Married-civ-spouse Exec-managerial Husband \n",
"2 Divorced Handlers-cleaners Not-in-family \n",
"3 Married-civ-spouse Handlers-cleaners Husband \n",
"4 Married-civ-spouse Prof-specialty Wife \n",
"5 Married-civ-spouse Exec-managerial Wife \n",
"6 Married-spouse-absent Other-service Not-in-family \n",
"7 Married-civ-spouse Exec-managerial Husband \n",
"8 Never-married Prof-specialty Not-in-family \n",
"9 Married-civ-spouse Exec-managerial Husband \n",
"10 Married-civ-spouse Exec-managerial Husband \n",
"11 Married-civ-spouse Prof-specialty Husband \n",
"12 Never-married Adm-clerical Own-child \n",
"13 Never-married Sales Not-in-family \n",
"14 Married-civ-spouse Craft-repair Husband \n",
"15 Married-civ-spouse Transport-moving Husband \n",
"16 Never-married Farming-fishing Own-child \n",
"17 Never-married Machine-op-inspct Unmarried \n",
"18 Married-civ-spouse Sales Husband \n",
"19 Divorced Exec-managerial Unmarried \n",
"20 Married-civ-spouse Prof-specialty Husband \n",
"21 Separated Other-service Unmarried \n",
"22 Married-civ-spouse Farming-fishing Husband \n",
"23 Married-civ-spouse Transport-moving Husband \n",
"24 Divorced Tech-support Unmarried \n",
"25 Married-civ-spouse Tech-support Husband \n",
"26 Never-married Craft-repair Own-child \n",
"27 Married-civ-spouse ? Husband \n",
"28 Divorced Exec-managerial Not-in-family \n",
"29 Married-civ-spouse Craft-repair Husband \n",
"\n",
" race sex capital-gain capital-loss hours-per-week \\\n",
"0 White Male 2174 0 40 \n",
"1 White Male 0 0 13 \n",
"2 White Male 0 0 40 \n",
"3 Black Male 0 0 40 \n",
"4 Black Female 0 0 40 \n",
"5 White Female 0 0 40 \n",
"6 Black Female 0 0 16 \n",
"7 White Male 0 0 45 \n",
"8 White Female 14084 0 50 \n",
"9 White Male 5178 0 40 \n",
"10 Black Male 0 0 80 \n",
"11 Asian-Pac-Islander Male 0 0 40 \n",
"12 White Female 0 0 30 \n",
"13 Black Male 0 0 50 \n",
"14 Asian-Pac-Islander Male 0 0 40 \n",
"15 Amer-Indian-Eskimo Male 0 0 45 \n",
"16 White Male 0 0 35 \n",
"17 White Male 0 0 40 \n",
"18 White Male 0 0 50 \n",
"19 White Female 0 0 45 \n",
"20 White Male 0 0 60 \n",
"21 Black Female 0 0 20 \n",
"22 Black Male 0 0 40 \n",
"23 White Male 0 2042 40 \n",
"24 White Female 0 0 40 \n",
"25 White Male 0 0 40 \n",
"26 White Male 0 0 40 \n",
"27 Asian-Pac-Islander Male 0 0 60 \n",
"28 White Male 0 0 80 \n",
"29 White Male 0 0 40 \n",
"\n",
" native-country income \n",
"0 United-States <=50K \n",
"1 United-States <=50K \n",
"2 United-States <=50K \n",
"3 United-States <=50K \n",
"4 Cuba <=50K \n",
"5 United-States <=50K \n",
"6 Jamaica <=50K \n",
"7 United-States >50K \n",
"8 United-States >50K \n",
"9 United-States >50K \n",
"10 United-States >50K \n",
"11 India >50K \n",
"12 United-States <=50K \n",
"13 United-States <=50K \n",
"14 ? >50K \n",
"15 Mexico <=50K \n",
"16 United-States <=50K \n",
"17 United-States <=50K \n",
"18 United-States <=50K \n",
"19 United-States >50K \n",
"20 United-States >50K \n",
"21 United-States <=50K \n",
"22 United-States <=50K \n",
"23 United-States <=50K \n",
"24 United-States <=50K \n",
"25 United-States >50K \n",
"26 United-States <=50K \n",
"27 South >50K \n",
"28 United-States <=50K \n",
"29 United-States <=50K "
]
}
],
"prompt_number": 16
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"adult.describe() #numerical summaries"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>fnlwgt</th>\n",
" <th>education-num</th>\n",
" <th>capital-gain</th>\n",
" <th>capital-loss</th>\n",
" <th>hours-per-week</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td> 32561.000000</td>\n",
" <td> 32561.000000</td>\n",
" <td> 32561.000000</td>\n",
" <td> 32561.000000</td>\n",
" <td> 32561.000000</td>\n",
" <td> 32561.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td> 38.581647</td>\n",
" <td> 189778.366512</td>\n",
" <td> 10.080679</td>\n",
" <td> 1077.648844</td>\n",
" <td> 87.303830</td>\n",
" <td> 40.437456</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td> 13.640433</td>\n",
" <td> 105549.977697</td>\n",
" <td> 2.572720</td>\n",
" <td> 7385.292085</td>\n",
" <td> 402.960219</td>\n",
" <td> 12.347429</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td> 17.000000</td>\n",
" <td> 12285.000000</td>\n",
" <td> 1.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td> 28.000000</td>\n",
" <td> 117827.000000</td>\n",
" <td> 9.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 40.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td> 37.000000</td>\n",
" <td> 178356.000000</td>\n",
" <td> 10.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 40.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td> 48.000000</td>\n",
" <td> 237051.000000</td>\n",
" <td> 12.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 45.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td> 90.000000</td>\n",
" <td> 1484705.000000</td>\n",
" <td> 16.000000</td>\n",
" <td> 99999.000000</td>\n",
" <td> 4356.000000</td>\n",
" <td> 99.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 17,
"text": [
" age fnlwgt education-num capital-gain \\\n",
"count 32561.000000 32561.000000 32561.000000 32561.000000 \n",
"mean 38.581647 189778.366512 10.080679 1077.648844 \n",
"std 13.640433 105549.977697 2.572720 7385.292085 \n",
"min 17.000000 12285.000000 1.000000 0.000000 \n",
"25% 28.000000 117827.000000 9.000000 0.000000 \n",
"50% 37.000000 178356.000000 10.000000 0.000000 \n",
"75% 48.000000 237051.000000 12.000000 0.000000 \n",
"max 90.000000 1484705.000000 16.000000 99999.000000 \n",
"\n",
" capital-loss hours-per-week \n",
"count 32561.000000 32561.000000 \n",
"mean 87.303830 40.437456 \n",
"std 402.960219 12.347429 \n",
"min 0.000000 1.000000 \n",
"25% 0.000000 40.000000 \n",
"50% 0.000000 40.000000 \n",
"75% 0.000000 45.000000 \n",
"max 4356.000000 99.000000 "
]
}
],
"prompt_number": 17
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"workclass=adult.groupby(\"workclass\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 18
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"len(workclass)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 19,
"text": [
"9"
]
}
],
"prompt_number": 19
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"workclass.sum()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>fnlwgt</th>\n",
" <th>education-num</th>\n",
" <th>capital-gain</th>\n",
" <th>capital-loss</th>\n",
" <th>hours-per-week</th>\n",
" </tr>\n",
" <tr>\n",
" <th>workclass</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th> ?</th>\n",
" <td> 75203</td>\n",
" <td> 346115997</td>\n",
" <td> 17002</td>\n",
" <td> 1114077</td>\n",
" <td> 111556</td>\n",
" <td> 58604</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Federal-gov</th>\n",
" <td> 40887</td>\n",
" <td> 177812394</td>\n",
" <td> 10535</td>\n",
" <td> 799903</td>\n",
" <td> 107778</td>\n",
" <td> 39724</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Local-gov</th>\n",
" <td> 87385</td>\n",
" <td> 394822919</td>\n",
" <td> 23111</td>\n",
" <td> 1842264</td>\n",
" <td> 229925</td>\n",
" <td> 85777</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Never-worked</th>\n",
" <td> 144</td>\n",
" <td> 1581927</td>\n",
" <td> 52</td>\n",
" <td> 0</td>\n",
" <td> 0</td>\n",
" <td> 199</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Private</th>\n",
" <td> 835158</td>\n",
" <td> 4374974348</td>\n",
" <td> 224230</td>\n",
" <td> 20181687</td>\n",
" <td> 1815878</td>\n",
" <td> 913902</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Self-emp-inc</th>\n",
" <td> 51355</td>\n",
" <td> 196395180</td>\n",
" <td> 12429</td>\n",
" <td> 5441274</td>\n",
" <td> 173135</td>\n",
" <td> 54481</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Self-emp-not-inc</th>\n",
" <td> 114268</td>\n",
" <td> 446221558</td>\n",
" <td> 25985</td>\n",
" <td> 4792483</td>\n",
" <td> 296361</td>\n",
" <td> 112876</td>\n",
" </tr>\n",
" <tr>\n",
" <th> State-gov</th>\n",
" <td> 51188</td>\n",
" <td> 239009324</td>\n",
" <td> 14766</td>\n",
" <td> 910806</td>\n",
" <td> 108067</td>\n",
" <td> 50663</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Without-pay</th>\n",
" <td> 669</td>\n",
" <td> 2439745</td>\n",
" <td> 127</td>\n",
" <td> 6830</td>\n",
" <td> 0</td>\n",
" <td> 458</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 20,
"text": [
" age fnlwgt education-num capital-gain \\\n",
"workclass \n",
" ? 75203 346115997 17002 1114077 \n",
" Federal-gov 40887 177812394 10535 799903 \n",
" Local-gov 87385 394822919 23111 1842264 \n",
" Never-worked 144 1581927 52 0 \n",
" Private 835158 4374974348 224230 20181687 \n",
" Self-emp-inc 51355 196395180 12429 5441274 \n",
" Self-emp-not-inc 114268 446221558 25985 4792483 \n",
" State-gov 51188 239009324 14766 910806 \n",
" Without-pay 669 2439745 127 6830 \n",
"\n",
" capital-loss hours-per-week \n",
"workclass \n",
" ? 111556 58604 \n",
" Federal-gov 107778 39724 \n",
" Local-gov 229925 85777 \n",
" Never-worked 0 199 \n",
" Private 1815878 913902 \n",
" Self-emp-inc 173135 54481 \n",
" Self-emp-not-inc 296361 112876 \n",
" State-gov 108067 50663 \n",
" Without-pay 0 458 "
]
}
],
"prompt_number": 20
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"workclass.count()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>fnlwgt</th>\n",
" <th>education</th>\n",
" <th>education-num</th>\n",
" <th>marital-status</th>\n",
" <th>occupation</th>\n",
" <th>relationship</th>\n",
" <th>race</th>\n",
" <th>sex</th>\n",
" <th>capital-gain</th>\n",
" <th>capital-loss</th>\n",
" <th>hours-per-week</th>\n",
" <th>native-country</th>\n",
" <th>income</th>\n",
" </tr>\n",
" <tr>\n",
" <th>workclass</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th> ?</th>\n",
" <td> 1836</td>\n",
" <td> 1836</td>\n",
" <td> 1836</td>\n",
" <td> 1836</td>\n",
" <td> 1836</td>\n",
" <td> 1836</td>\n",
" <td> 1836</td>\n",
" <td> 1836</td>\n",
" <td> 1836</td>\n",
" <td> 1836</td>\n",
" <td> 1836</td>\n",
" <td> 1836</td>\n",
" <td> 1836</td>\n",
" <td> 1836</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Federal-gov</th>\n",
" <td> 960</td>\n",
" <td> 960</td>\n",
" <td> 960</td>\n",
" <td> 960</td>\n",
" <td> 960</td>\n",
" <td> 960</td>\n",
" <td> 960</td>\n",
" <td> 960</td>\n",
" <td> 960</td>\n",
" <td> 960</td>\n",
" <td> 960</td>\n",
" <td> 960</td>\n",
" <td> 960</td>\n",
" <td> 960</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Local-gov</th>\n",
" <td> 2093</td>\n",
" <td> 2093</td>\n",
" <td> 2093</td>\n",
" <td> 2093</td>\n",
" <td> 2093</td>\n",
" <td> 2093</td>\n",
" <td> 2093</td>\n",
" <td> 2093</td>\n",
" <td> 2093</td>\n",
" <td> 2093</td>\n",
" <td> 2093</td>\n",
" <td> 2093</td>\n",
" <td> 2093</td>\n",
" <td> 2093</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Never-worked</th>\n",
" <td> 7</td>\n",
" <td> 7</td>\n",
" <td> 7</td>\n",
" <td> 7</td>\n",
" <td> 7</td>\n",
" <td> 7</td>\n",
" <td> 7</td>\n",
" <td> 7</td>\n",
" <td> 7</td>\n",
" <td> 7</td>\n",
" <td> 7</td>\n",
" <td> 7</td>\n",
" <td> 7</td>\n",
" <td> 7</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Private</th>\n",
" <td> 22696</td>\n",
" <td> 22696</td>\n",
" <td> 22696</td>\n",
" <td> 22696</td>\n",
" <td> 22696</td>\n",
" <td> 22696</td>\n",
" <td> 22696</td>\n",
" <td> 22696</td>\n",
" <td> 22696</td>\n",
" <td> 22696</td>\n",
" <td> 22696</td>\n",
" <td> 22696</td>\n",
" <td> 22696</td>\n",
" <td> 22696</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Self-emp-inc</th>\n",
" <td> 1116</td>\n",
" <td> 1116</td>\n",
" <td> 1116</td>\n",
" <td> 1116</td>\n",
" <td> 1116</td>\n",
" <td> 1116</td>\n",
" <td> 1116</td>\n",
" <td> 1116</td>\n",
" <td> 1116</td>\n",
" <td> 1116</td>\n",
" <td> 1116</td>\n",
" <td> 1116</td>\n",
" <td> 1116</td>\n",
" <td> 1116</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Self-emp-not-inc</th>\n",
" <td> 2541</td>\n",
" <td> 2541</td>\n",
" <td> 2541</td>\n",
" <td> 2541</td>\n",
" <td> 2541</td>\n",
" <td> 2541</td>\n",
" <td> 2541</td>\n",
" <td> 2541</td>\n",
" <td> 2541</td>\n",
" <td> 2541</td>\n",
" <td> 2541</td>\n",
" <td> 2541</td>\n",
" <td> 2541</td>\n",
" <td> 2541</td>\n",
" </tr>\n",
" <tr>\n",
" <th> State-gov</th>\n",
" <td> 1298</td>\n",
" <td> 1298</td>\n",
" <td> 1298</td>\n",
" <td> 1298</td>\n",
" <td> 1298</td>\n",
" <td> 1298</td>\n",
" <td> 1298</td>\n",
" <td> 1298</td>\n",
" <td> 1298</td>\n",
" <td> 1298</td>\n",
" <td> 1298</td>\n",
" <td> 1298</td>\n",
" <td> 1298</td>\n",
" <td> 1298</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Without-pay</th>\n",
" <td> 14</td>\n",
" <td> 14</td>\n",
" <td> 14</td>\n",
" <td> 14</td>\n",
" <td> 14</td>\n",
" <td> 14</td>\n",
" <td> 14</td>\n",
" <td> 14</td>\n",
" <td> 14</td>\n",
" <td> 14</td>\n",
" <td> 14</td>\n",
" <td> 14</td>\n",
" <td> 14</td>\n",
" <td> 14</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 21,
"text": [
" age fnlwgt education education-num marital-status \\\n",
"workclass \n",
" ? 1836 1836 1836 1836 1836 \n",
" Federal-gov 960 960 960 960 960 \n",
" Local-gov 2093 2093 2093 2093 2093 \n",
" Never-worked 7 7 7 7 7 \n",
" Private 22696 22696 22696 22696 22696 \n",
" Self-emp-inc 1116 1116 1116 1116 1116 \n",
" Self-emp-not-inc 2541 2541 2541 2541 2541 \n",
" State-gov 1298 1298 1298 1298 1298 \n",
" Without-pay 14 14 14 14 14 \n",
"\n",
" occupation relationship race sex capital-gain \\\n",
"workclass \n",
" ? 1836 1836 1836 1836 1836 \n",
" Federal-gov 960 960 960 960 960 \n",
" Local-gov 2093 2093 2093 2093 2093 \n",
" Never-worked 7 7 7 7 7 \n",
" Private 22696 22696 22696 22696 22696 \n",
" Self-emp-inc 1116 1116 1116 1116 1116 \n",
" Self-emp-not-inc 2541 2541 2541 2541 2541 \n",
" State-gov 1298 1298 1298 1298 1298 \n",
" Without-pay 14 14 14 14 14 \n",
"\n",
" capital-loss hours-per-week native-country income \n",
"workclass \n",
" ? 1836 1836 1836 1836 \n",
" Federal-gov 960 960 960 960 \n",
" Local-gov 2093 2093 2093 2093 \n",
" Never-worked 7 7 7 7 \n",
" Private 22696 22696 22696 22696 \n",
" Self-emp-inc 1116 1116 1116 1116 \n",
" Self-emp-not-inc 2541 2541 2541 2541 \n",
" State-gov 1298 1298 1298 1298 \n",
" Without-pay 14 14 14 14 "
]
}
],
"prompt_number": 21
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"workclass.describe()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>capital-gain</th>\n",
" <th>capital-loss</th>\n",
" <th>education-num</th>\n",
" <th>fnlwgt</th>\n",
" <th>hours-per-week</th>\n",
" </tr>\n",
" <tr>\n",
" <th>workclass</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"8\" valign=\"top\"> ?</th>\n",
" <th>count</th>\n",
" <td> 1836.000000</td>\n",
" <td> 1836.000000</td>\n",
" <td> 1836.000000</td>\n",
" <td> 1836.000000</td>\n",
" <td> 1836.000000</td>\n",
" <td> 1836.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td> 40.960240</td>\n",
" <td> 606.795752</td>\n",
" <td> 60.760349</td>\n",
" <td> 9.260349</td>\n",
" <td> 188516.338235</td>\n",
" <td> 31.919390</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td> 20.334587</td>\n",
" <td> 5147.323872</td>\n",
" <td> 354.685264</td>\n",
" <td> 2.601986</td>\n",
" <td> 107089.902252</td>\n",
" <td> 14.909903</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td> 17.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 1.000000</td>\n",
" <td> 12285.000000</td>\n",
" <td> 1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td> 21.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 9.000000</td>\n",
" <td> 117771.250000</td>\n",
" <td> 20.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td> 35.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 9.000000</td>\n",
" <td> 175617.000000</td>\n",
" <td> 36.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td> 61.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 10.000000</td>\n",
" <td> 234568.500000</td>\n",
" <td> 40.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td> 90.000000</td>\n",
" <td> 99999.000000</td>\n",
" <td> 4356.000000</td>\n",
" <td> 16.000000</td>\n",
" <td> 981628.000000</td>\n",
" <td> 99.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"8\" valign=\"top\"> Federal-gov</th>\n",
" <th>count</th>\n",
" <td> 960.000000</td>\n",
" <td> 960.000000</td>\n",
" <td> 960.000000</td>\n",
" <td> 960.000000</td>\n",
" <td> 960.000000</td>\n",
" <td> 960.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td> 42.590625</td>\n",
" <td> 833.232292</td>\n",
" <td> 112.268750</td>\n",
" <td> 10.973958</td>\n",
" <td> 185221.243750</td>\n",
" <td> 41.379167</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td> 11.509171</td>\n",
" <td> 4101.966767</td>\n",
" <td> 453.504623</td>\n",
" <td> 2.113650</td>\n",
" <td> 117502.359524</td>\n",
" <td> 8.838605</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td> 17.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 3.000000</td>\n",
" <td> 19914.000000</td>\n",
" <td> 4.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td> 34.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 9.000000</td>\n",
" <td> 97781.250000</td>\n",
" <td> 40.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td> 43.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 10.000000</td>\n",
" <td> 175771.000000</td>\n",
" <td> 40.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td> 51.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 13.000000</td>\n",
" <td> 243960.250000</td>\n",
" <td> 40.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td> 90.000000</td>\n",
" <td> 99999.000000</td>\n",
" <td> 3683.000000</td>\n",
" <td> 16.000000</td>\n",
" <td> 930948.000000</td>\n",
" <td> 99.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"8\" valign=\"top\"> Local-gov</th>\n",
" <th>count</th>\n",
" <td> 2093.000000</td>\n",
" <td> 2093.000000</td>\n",
" <td> 2093.000000</td>\n",
" <td> 2093.000000</td>\n",
" <td> 2093.000000</td>\n",
" <td> 2093.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td> 41.751075</td>\n",
" <td> 880.202580</td>\n",
" <td> 109.854276</td>\n",
" <td> 11.042045</td>\n",
" <td> 188639.712852</td>\n",
" <td> 40.982800</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td> 12.272856</td>\n",
" <td> 5775.043442</td>\n",
" <td> 439.513203</td>\n",
" <td> 2.552536</td>\n",
" <td> 100254.775314</td>\n",
" <td> 10.771559</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td> 17.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 1.000000</td>\n",
" <td> 14878.000000</td>\n",
" <td> 2.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td> 32.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 9.000000</td>\n",
" <td> 121124.000000</td>\n",
" <td> 40.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td> 41.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 11.000000</td>\n",
" <td> 179580.000000</td>\n",
" <td> 40.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td> 50.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 13.000000</td>\n",
" <td> 236487.000000</td>\n",
" <td> 44.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td> 90.000000</td>\n",
" <td> 99999.000000</td>\n",
" <td> 2444.000000</td>\n",
" <td> 16.000000</td>\n",
" <td> 1125613.000000</td>\n",
" <td> 99.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"6\" valign=\"top\"> Never-worked</th>\n",
" <th>count</th>\n",
" <td> 7.000000</td>\n",
" <td> 7.000000</td>\n",
" <td> 7.000000</td>\n",
" <td> 7.000000</td>\n",
" <td> 7.000000</td>\n",
" <td> 7.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td> 20.571429</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 7.428571</td>\n",
" <td> 225989.571429</td>\n",
" <td> 28.428571</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td> 4.613644</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 2.299068</td>\n",
" <td> 108135.748347</td>\n",
" <td> 15.186147</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td> 17.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 4.000000</td>\n",
" <td> 153663.000000</td>\n",
" <td> 4.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td> 18.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 6.000000</td>\n",
" <td> 166902.000000</td>\n",
" <td> 20.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td> 18.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 7.000000</td>\n",
" <td> 188535.000000</td>\n",
" <td> 35.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"6\" valign=\"top\"> Self-emp-inc</th>\n",
" <th>std</th>\n",
" <td> 12.553194</td>\n",
" <td> 17976.548086</td>\n",
" <td> 549.488497</td>\n",
" <td> 2.603210</td>\n",
" <td> 96436.282913</td>\n",
" <td> 13.900417</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td> 17.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 2.000000</td>\n",
" <td> 21626.000000</td>\n",
" <td> 1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td> 37.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 9.000000</td>\n",
" <td> 113539.750000</td>\n",
" <td> 40.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td> 45.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 10.000000</td>\n",
" <td> 165667.000000</td>\n",
" <td> 50.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td> 54.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 13.000000</td>\n",
" <td> 213722.750000</td>\n",
" <td> 60.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td> 84.000000</td>\n",
" <td> 99999.000000</td>\n",
" <td> 2559.000000</td>\n",
" <td> 16.000000</td>\n",
" <td> 1097453.000000</td>\n",
" <td> 99.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"8\" valign=\"top\"> Self-emp-not-inc</th>\n",
" <th>count</th>\n",
" <td> 2541.000000</td>\n",
" <td> 2541.000000</td>\n",
" <td> 2541.000000</td>\n",
" <td> 2541.000000</td>\n",
" <td> 2541.000000</td>\n",
" <td> 2541.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td> 44.969697</td>\n",
" <td> 1886.061787</td>\n",
" <td> 116.631641</td>\n",
" <td> 10.226289</td>\n",
" <td> 175608.641480</td>\n",
" <td> 44.421881</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td> 13.338162</td>\n",
" <td> 10986.233506</td>\n",
" <td> 467.611687</td>\n",
" <td> 2.768132</td>\n",
" <td> 100735.757730</td>\n",
" <td> 16.674958</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td> 17.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 2.000000</td>\n",
" <td> 20098.000000</td>\n",
" <td> 1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td> 35.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 9.000000</td>\n",
" <td> 104973.000000</td>\n",
" <td> 40.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td> 44.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 10.000000</td>\n",
" <td> 168109.000000</td>\n",
" <td> 40.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td> 54.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 13.000000</td>\n",
" <td> 227298.000000</td>\n",
" <td> 50.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td> 90.000000</td>\n",
" <td> 99999.000000</td>\n",
" <td> 2824.000000</td>\n",
" <td> 16.000000</td>\n",
" <td> 795830.000000</td>\n",
" <td> 99.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"8\" valign=\"top\"> State-gov</th>\n",
" <th>count</th>\n",
" <td> 1298.000000</td>\n",
" <td> 1298.000000</td>\n",
" <td> 1298.000000</td>\n",
" <td> 1298.000000</td>\n",
" <td> 1298.000000</td>\n",
" <td> 1298.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td> 39.436055</td>\n",
" <td> 701.699538</td>\n",
" <td> 83.256549</td>\n",
" <td> 11.375963</td>\n",
" <td> 184136.613251</td>\n",
" <td> 39.031587</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td> 12.431065</td>\n",
" <td> 3777.749185</td>\n",
" <td> 394.469789</td>\n",
" <td> 2.538604</td>\n",
" <td> 111512.980926</td>\n",
" <td> 11.697014</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td> 17.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 1.000000</td>\n",
" <td> 19395.000000</td>\n",
" <td> 1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td> 30.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 9.000000</td>\n",
" <td> 108903.750000</td>\n",
" <td> 38.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td> 39.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 10.000000</td>\n",
" <td> 169402.500000</td>\n",
" <td> 40.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td> 48.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 13.000000</td>\n",
" <td> 238532.750000</td>\n",
" <td> 40.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td> 81.000000</td>\n",
" <td> 99999.000000</td>\n",
" <td> 3683.000000</td>\n",
" <td> 16.000000</td>\n",
" <td> 1033222.000000</td>\n",
" <td> 99.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"8\" valign=\"top\"> Without-pay</th>\n",
" <th>count</th>\n",
" <td> 14.000000</td>\n",
" <td> 14.000000</td>\n",
" <td> 14.000000</td>\n",
" <td> 14.000000</td>\n",
" <td> 14.000000</td>\n",
" <td> 14.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td> 47.785714</td>\n",
" <td> 487.857143</td>\n",
" <td> 0.000000</td>\n",
" <td> 9.071429</td>\n",
" <td> 174267.500000</td>\n",
" <td> 32.714286</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td> 21.075610</td>\n",
" <td> 1300.780467</td>\n",
" <td> 0.000000</td>\n",
" <td> 1.685426</td>\n",
" <td> 85536.385921</td>\n",
" <td> 17.357900</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td> 19.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 4.000000</td>\n",
" <td> 27012.000000</td>\n",
" <td> 10.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td> 23.750000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 9.000000</td>\n",
" <td> 138446.500000</td>\n",
" <td> 20.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td> 57.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 9.000000</td>\n",
" <td> 171531.500000</td>\n",
" <td> 27.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td> 65.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 9.750000</td>\n",
" <td> 209006.500000</td>\n",
" <td> 47.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td> 72.000000</td>\n",
" <td> 4416.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 12.000000</td>\n",
" <td> 344858.000000</td>\n",
" <td> 65.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>72 rows \u00d7 6 columns</p>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 22,
"text": [
" age capital-gain capital-loss \\\n",
"workclass \n",
" ? count 1836.000000 1836.000000 1836.000000 \n",
" mean 40.960240 606.795752 60.760349 \n",
" std 20.334587 5147.323872 354.685264 \n",
" min 17.000000 0.000000 0.000000 \n",
" 25% 21.000000 0.000000 0.000000 \n",
" 50% 35.000000 0.000000 0.000000 \n",
" 75% 61.000000 0.000000 0.000000 \n",
" max 90.000000 99999.000000 4356.000000 \n",
" Federal-gov count 960.000000 960.000000 960.000000 \n",
" mean 42.590625 833.232292 112.268750 \n",
" std 11.509171 4101.966767 453.504623 \n",
" min 17.000000 0.000000 0.000000 \n",
" 25% 34.000000 0.000000 0.000000 \n",
" 50% 43.000000 0.000000 0.000000 \n",
" 75% 51.000000 0.000000 0.000000 \n",
" max 90.000000 99999.000000 3683.000000 \n",
" Local-gov count 2093.000000 2093.000000 2093.000000 \n",
" mean 41.751075 880.202580 109.854276 \n",
" std 12.272856 5775.043442 439.513203 \n",
" min 17.000000 0.000000 0.000000 \n",
" 25% 32.000000 0.000000 0.000000 \n",
" 50% 41.000000 0.000000 0.000000 \n",
" 75% 50.000000 0.000000 0.000000 \n",
" max 90.000000 99999.000000 2444.000000 \n",
" Never-worked count 7.000000 7.000000 7.000000 \n",
" mean 20.571429 0.000000 0.000000 \n",
" std 4.613644 0.000000 0.000000 \n",
" min 17.000000 0.000000 0.000000 \n",
" 25% 18.000000 0.000000 0.000000 \n",
" 50% 18.000000 0.000000 0.000000 \n",
"... ... ... ... \n",
" Self-emp-inc std 12.553194 17976.548086 549.488497 \n",
" min 17.000000 0.000000 0.000000 \n",
" 25% 37.000000 0.000000 0.000000 \n",
" 50% 45.000000 0.000000 0.000000 \n",
" 75% 54.000000 0.000000 0.000000 \n",
" max 84.000000 99999.000000 2559.000000 \n",
" Self-emp-not-inc count 2541.000000 2541.000000 2541.000000 \n",
" mean 44.969697 1886.061787 116.631641 \n",
" std 13.338162 10986.233506 467.611687 \n",
" min 17.000000 0.000000 0.000000 \n",
" 25% 35.000000 0.000000 0.000000 \n",
" 50% 44.000000 0.000000 0.000000 \n",
" 75% 54.000000 0.000000 0.000000 \n",
" max 90.000000 99999.000000 2824.000000 \n",
" State-gov count 1298.000000 1298.000000 1298.000000 \n",
" mean 39.436055 701.699538 83.256549 \n",
" std 12.431065 3777.749185 394.469789 \n",
" min 17.000000 0.000000 0.000000 \n",
" 25% 30.000000 0.000000 0.000000 \n",
" 50% 39.000000 0.000000 0.000000 \n",
" 75% 48.000000 0.000000 0.000000 \n",
" max 81.000000 99999.000000 3683.000000 \n",
" Without-pay count 14.000000 14.000000 14.000000 \n",
" mean 47.785714 487.857143 0.000000 \n",
" std 21.075610 1300.780467 0.000000 \n",
" min 19.000000 0.000000 0.000000 \n",
" 25% 23.750000 0.000000 0.000000 \n",
" 50% 57.000000 0.000000 0.000000 \n",
" 75% 65.000000 0.000000 0.000000 \n",
" max 72.000000 4416.000000 0.000000 \n",
"\n",
" education-num fnlwgt hours-per-week \n",
"workclass \n",
" ? count 1836.000000 1836.000000 1836.000000 \n",
" mean 9.260349 188516.338235 31.919390 \n",
" std 2.601986 107089.902252 14.909903 \n",
" min 1.000000 12285.000000 1.000000 \n",
" 25% 9.000000 117771.250000 20.000000 \n",
" 50% 9.000000 175617.000000 36.000000 \n",
" 75% 10.000000 234568.500000 40.000000 \n",
" max 16.000000 981628.000000 99.000000 \n",
" Federal-gov count 960.000000 960.000000 960.000000 \n",
" mean 10.973958 185221.243750 41.379167 \n",
" std 2.113650 117502.359524 8.838605 \n",
" min 3.000000 19914.000000 4.000000 \n",
" 25% 9.000000 97781.250000 40.000000 \n",
" 50% 10.000000 175771.000000 40.000000 \n",
" 75% 13.000000 243960.250000 40.000000 \n",
" max 16.000000 930948.000000 99.000000 \n",
" Local-gov count 2093.000000 2093.000000 2093.000000 \n",
" mean 11.042045 188639.712852 40.982800 \n",
" std 2.552536 100254.775314 10.771559 \n",
" min 1.000000 14878.000000 2.000000 \n",
" 25% 9.000000 121124.000000 40.000000 \n",
" 50% 11.000000 179580.000000 40.000000 \n",
" 75% 13.000000 236487.000000 44.000000 \n",
" max 16.000000 1125613.000000 99.000000 \n",
" Never-worked count 7.000000 7.000000 7.000000 \n",
" mean 7.428571 225989.571429 28.428571 \n",
" std 2.299068 108135.748347 15.186147 \n",
" min 4.000000 153663.000000 4.000000 \n",
" 25% 6.000000 166902.000000 20.000000 \n",
" 50% 7.000000 188535.000000 35.000000 \n",
"... ... ... ... \n",
" Self-emp-inc std 2.603210 96436.282913 13.900417 \n",
" min 2.000000 21626.000000 1.000000 \n",
" 25% 9.000000 113539.750000 40.000000 \n",
" 50% 10.000000 165667.000000 50.000000 \n",
" 75% 13.000000 213722.750000 60.000000 \n",
" max 16.000000 1097453.000000 99.000000 \n",
" Self-emp-not-inc count 2541.000000 2541.000000 2541.000000 \n",
" mean 10.226289 175608.641480 44.421881 \n",
" std 2.768132 100735.757730 16.674958 \n",
" min 2.000000 20098.000000 1.000000 \n",
" 25% 9.000000 104973.000000 40.000000 \n",
" 50% 10.000000 168109.000000 40.000000 \n",
" 75% 13.000000 227298.000000 50.000000 \n",
" max 16.000000 795830.000000 99.000000 \n",
" State-gov count 1298.000000 1298.000000 1298.000000 \n",
" mean 11.375963 184136.613251 39.031587 \n",
" std 2.538604 111512.980926 11.697014 \n",
" min 1.000000 19395.000000 1.000000 \n",
" 25% 9.000000 108903.750000 38.000000 \n",
" 50% 10.000000 169402.500000 40.000000 \n",
" 75% 13.000000 238532.750000 40.000000 \n",
" max 16.000000 1033222.000000 99.000000 \n",
" Without-pay count 14.000000 14.000000 14.000000 \n",
" mean 9.071429 174267.500000 32.714286 \n",
" std 1.685426 85536.385921 17.357900 \n",
" min 4.000000 27012.000000 10.000000 \n",
" 25% 9.000000 138446.500000 20.000000 \n",
" 50% 9.000000 171531.500000 27.500000 \n",
" 75% 9.750000 209006.500000 47.500000 \n",
" max 12.000000 344858.000000 65.000000 \n",
"\n",
"[72 rows x 6 columns]"
]
}
],
"prompt_number": 22
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"race=adult.groupby(\"race\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 23
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"race.sum()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>fnlwgt</th>\n",
" <th>education-num</th>\n",
" <th>capital-gain</th>\n",
" <th>capital-loss</th>\n",
" <th>hours-per-week</th>\n",
" </tr>\n",
" <tr>\n",
" <th>race</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th> Amer-Indian-Eskimo</th>\n",
" <td> 11561</td>\n",
" <td> 37578487</td>\n",
" <td> 2896</td>\n",
" <td> 194458</td>\n",
" <td> 10629</td>\n",
" <td> 12455</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Asian-Pac-Islander</th>\n",
" <td> 39219</td>\n",
" <td> 166178293</td>\n",
" <td> 11388</td>\n",
" <td> 1536014</td>\n",
" <td> 101014</td>\n",
" <td> 41692</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Black</th>\n",
" <td> 117987</td>\n",
" <td> 712313000</td>\n",
" <td> 29635</td>\n",
" <td> 1905454</td>\n",
" <td> 188643</td>\n",
" <td> 120033</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Other</th>\n",
" <td> 9067</td>\n",
" <td> 53420656</td>\n",
" <td> 2396</td>\n",
" <td> 253293</td>\n",
" <td> 16550</td>\n",
" <td> 10696</td>\n",
" </tr>\n",
" <tr>\n",
" <th> White</th>\n",
" <td> 1078423</td>\n",
" <td> 5209882956</td>\n",
" <td> 281922</td>\n",
" <td> 31200105</td>\n",
" <td> 2525864</td>\n",
" <td> 1131808</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 24,
"text": [
" age fnlwgt education-num capital-gain \\\n",
"race \n",
" Amer-Indian-Eskimo 11561 37578487 2896 194458 \n",
" Asian-Pac-Islander 39219 166178293 11388 1536014 \n",
" Black 117987 712313000 29635 1905454 \n",
" Other 9067 53420656 2396 253293 \n",
" White 1078423 5209882956 281922 31200105 \n",
"\n",
" capital-loss hours-per-week \n",
"race \n",
" Amer-Indian-Eskimo 10629 12455 \n",
" Asian-Pac-Islander 101014 41692 \n",
" Black 188643 120033 \n",
" Other 16550 10696 \n",
" White 2525864 1131808 "
]
}
],
"prompt_number": 24
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"race.mean()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>fnlwgt</th>\n",
" <th>education-num</th>\n",
" <th>capital-gain</th>\n",
" <th>capital-loss</th>\n",
" <th>hours-per-week</th>\n",
" </tr>\n",
" <tr>\n",
" <th>race</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th> Amer-Indian-Eskimo</th>\n",
" <td> 37.173633</td>\n",
" <td> 120831.147910</td>\n",
" <td> 9.311897</td>\n",
" <td> 625.266881</td>\n",
" <td> 34.176849</td>\n",
" <td> 40.048232</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Asian-Pac-Islander</th>\n",
" <td> 37.746872</td>\n",
" <td> 159940.609240</td>\n",
" <td> 10.960539</td>\n",
" <td> 1478.358037</td>\n",
" <td> 97.222329</td>\n",
" <td> 40.127045</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Black</th>\n",
" <td> 37.767926</td>\n",
" <td> 228013.124200</td>\n",
" <td> 9.486236</td>\n",
" <td> 609.940461</td>\n",
" <td> 60.385083</td>\n",
" <td> 38.422855</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Other</th>\n",
" <td> 33.457565</td>\n",
" <td> 197124.191882</td>\n",
" <td> 8.841328</td>\n",
" <td> 934.660517</td>\n",
" <td> 61.070111</td>\n",
" <td> 39.468635</td>\n",
" </tr>\n",
" <tr>\n",
" <th> White</th>\n",
" <td> 38.769881</td>\n",
" <td> 187298.064280</td>\n",
" <td> 10.135246</td>\n",
" <td> 1121.660375</td>\n",
" <td> 90.806155</td>\n",
" <td> 40.689100</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 25,
"text": [
" age fnlwgt education-num capital-gain \\\n",
"race \n",
" Amer-Indian-Eskimo 37.173633 120831.147910 9.311897 625.266881 \n",
" Asian-Pac-Islander 37.746872 159940.609240 10.960539 1478.358037 \n",
" Black 37.767926 228013.124200 9.486236 609.940461 \n",
" Other 33.457565 197124.191882 8.841328 934.660517 \n",
" White 38.769881 187298.064280 10.135246 1121.660375 \n",
"\n",
" capital-loss hours-per-week \n",
"race \n",
" Amer-Indian-Eskimo 34.176849 40.048232 \n",
" Asian-Pac-Islander 97.222329 40.127045 \n",
" Black 60.385083 38.422855 \n",
" Other 61.070111 39.468635 \n",
" White 90.806155 40.689100 "
]
}
],
"prompt_number": 25
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.crosstab(adult.race, adult.workclass)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>workclass</th>\n",
" <th> ?</th>\n",
" <th> Federal-gov</th>\n",
" <th> Local-gov</th>\n",
" <th> Never-worked</th>\n",
" <th> Private</th>\n",
" <th> Self-emp-inc</th>\n",
" <th> Self-emp-not-inc</th>\n",
" <th> State-gov</th>\n",
" <th> Without-pay</th>\n",
" </tr>\n",
" <tr>\n",
" <th>race</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th> Amer-Indian-Eskimo</th>\n",
" <td> 25</td>\n",
" <td> 19</td>\n",
" <td> 36</td>\n",
" <td> 0</td>\n",
" <td> 190</td>\n",
" <td> 2</td>\n",
" <td> 24</td>\n",
" <td> 15</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Asian-Pac-Islander</th>\n",
" <td> 65</td>\n",
" <td> 44</td>\n",
" <td> 39</td>\n",
" <td> 0</td>\n",
" <td> 713</td>\n",
" <td> 46</td>\n",
" <td> 73</td>\n",
" <td> 58</td>\n",
" <td> 1</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Black</th>\n",
" <td> 213</td>\n",
" <td> 169</td>\n",
" <td> 288</td>\n",
" <td> 2</td>\n",
" <td> 2176</td>\n",
" <td> 23</td>\n",
" <td> 93</td>\n",
" <td> 159</td>\n",
" <td> 1</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Other</th>\n",
" <td> 23</td>\n",
" <td> 7</td>\n",
" <td> 10</td>\n",
" <td> 0</td>\n",
" <td> 213</td>\n",
" <td> 5</td>\n",
" <td> 9</td>\n",
" <td> 4</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th> White</th>\n",
" <td> 1510</td>\n",
" <td> 721</td>\n",
" <td> 1720</td>\n",
" <td> 5</td>\n",
" <td> 19404</td>\n",
" <td> 1040</td>\n",
" <td> 2342</td>\n",
" <td> 1062</td>\n",
" <td> 12</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 26,
"text": [
"workclass ? Federal-gov Local-gov Never-worked Private \\\n",
"race \n",
" Amer-Indian-Eskimo 25 19 36 0 190 \n",
" Asian-Pac-Islander 65 44 39 0 713 \n",
" Black 213 169 288 2 2176 \n",
" Other 23 7 10 0 213 \n",
" White 1510 721 1720 5 19404 \n",
"\n",
"workclass Self-emp-inc Self-emp-not-inc State-gov \\\n",
"race \n",
" Amer-Indian-Eskimo 2 24 15 \n",
" Asian-Pac-Islander 46 73 58 \n",
" Black 23 93 159 \n",
" Other 5 9 4 \n",
" White 1040 2342 1062 \n",
"\n",
"workclass Without-pay \n",
"race \n",
" Amer-Indian-Eskimo 0 \n",
" Asian-Pac-Islander 1 \n",
" Black 1 \n",
" Other 0 \n",
" White 12 "
]
}
],
"prompt_number": 26
},
{
"cell_type": "raw",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.crosstab(adult.race, adult.sex)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>sex</th>\n",
" <th> Female</th>\n",
" <th> Male</th>\n",
" </tr>\n",
" <tr>\n",
" <th>race</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th> Amer-Indian-Eskimo</th>\n",
" <td> 119</td>\n",
" <td> 192</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Asian-Pac-Islander</th>\n",
" <td> 346</td>\n",
" <td> 693</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Black</th>\n",
" <td> 1555</td>\n",
" <td> 1569</td>\n",
" </tr>\n",
" <tr>\n",
" <th> Other</th>\n",
" <td> 109</td>\n",
" <td> 162</td>\n",
" </tr>\n",
" <tr>\n",
" <th> White</th>\n",
" <td> 8642</td>\n",
" <td> 19174</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 27,
"text": [
"sex Female Male\n",
"race \n",
" Amer-Indian-Eskimo 119 192\n",
" Asian-Pac-Islander 346 693\n",
" Black 1555 1569\n",
" Other 109 162\n",
" White 8642 19174"
]
}
],
"prompt_number": 27
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.crosstab(adult.income, adult.sex)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>sex</th>\n",
" <th> Female</th>\n",
" <th> Male</th>\n",
" </tr>\n",
" <tr>\n",
" <th>income</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th> &lt;=50K</th>\n",
" <td> 9592</td>\n",
" <td> 15128</td>\n",
" </tr>\n",
" <tr>\n",
" <th> &gt;50K</th>\n",
" <td> 1179</td>\n",
" <td> 6662</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 28,
"text": [
"sex Female Male\n",
"income \n",
" <=50K 9592 15128\n",
" >50K 1179 6662"
]
}
],
"prompt_number": 28
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.crosstab(adult.income, adult.race)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>race</th>\n",
" <th> Amer-Indian-Eskimo</th>\n",
" <th> Asian-Pac-Islander</th>\n",
" <th> Black</th>\n",
" <th> Other</th>\n",
" <th> White</th>\n",
" </tr>\n",
" <tr>\n",
" <th>income</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th> &lt;=50K</th>\n",
" <td> 275</td>\n",
" <td> 763</td>\n",
" <td> 2737</td>\n",
" <td> 246</td>\n",
" <td> 20699</td>\n",
" </tr>\n",
" <tr>\n",
" <th> &gt;50K</th>\n",
" <td> 36</td>\n",
" <td> 276</td>\n",
" <td> 387</td>\n",
" <td> 25</td>\n",
" <td> 7117</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 29,
"text": [
"race Amer-Indian-Eskimo Asian-Pac-Islander Black Other White\n",
"income \n",
" <=50K 275 763 2737 246 20699\n",
" >50K 36 276 387 25 7117"
]
}
],
"prompt_number": 29
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"adult.corr(method='pearson', min_periods=1)\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>fnlwgt</th>\n",
" <th>education-num</th>\n",
" <th>capital-gain</th>\n",
" <th>capital-loss</th>\n",
" <th>hours-per-week</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>age</th>\n",
" <td> 1.000000</td>\n",
" <td>-0.076646</td>\n",
" <td> 0.036527</td>\n",
" <td> 0.077674</td>\n",
" <td> 0.057775</td>\n",
" <td> 0.068756</td>\n",
" </tr>\n",
" <tr>\n",
" <th>fnlwgt</th>\n",
" <td>-0.076646</td>\n",
" <td> 1.000000</td>\n",
" <td>-0.043195</td>\n",
" <td> 0.000432</td>\n",
" <td>-0.010252</td>\n",
" <td>-0.018768</td>\n",
" </tr>\n",
" <tr>\n",
" <th>education-num</th>\n",
" <td> 0.036527</td>\n",
" <td>-0.043195</td>\n",
" <td> 1.000000</td>\n",
" <td> 0.122630</td>\n",
" <td> 0.079923</td>\n",
" <td> 0.148123</td>\n",
" </tr>\n",
" <tr>\n",
" <th>capital-gain</th>\n",
" <td> 0.077674</td>\n",
" <td> 0.000432</td>\n",
" <td> 0.122630</td>\n",
" <td> 1.000000</td>\n",
" <td>-0.031615</td>\n",
" <td> 0.078409</td>\n",
" </tr>\n",
" <tr>\n",
" <th>capital-loss</th>\n",
" <td> 0.057775</td>\n",
" <td>-0.010252</td>\n",
" <td> 0.079923</td>\n",
" <td>-0.031615</td>\n",
" <td> 1.000000</td>\n",
" <td> 0.054256</td>\n",
" </tr>\n",
" <tr>\n",
" <th>hours-per-week</th>\n",
" <td> 0.068756</td>\n",
" <td>-0.018768</td>\n",
" <td> 0.148123</td>\n",
" <td> 0.078409</td>\n",
" <td> 0.054256</td>\n",
" <td> 1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 35,
"text": [
" age fnlwgt education-num capital-gain capital-loss \\\n",
"age 1.000000 -0.076646 0.036527 0.077674 0.057775 \n",
"fnlwgt -0.076646 1.000000 -0.043195 0.000432 -0.010252 \n",
"education-num 0.036527 -0.043195 1.000000 0.122630 0.079923 \n",
"capital-gain 0.077674 0.000432 0.122630 1.000000 -0.031615 \n",
"capital-loss 0.057775 -0.010252 0.079923 -0.031615 1.000000 \n",
"hours-per-week 0.068756 -0.018768 0.148123 0.078409 0.054256 \n",
"\n",
" hours-per-week \n",
"age 0.068756 \n",
"fnlwgt -0.018768 \n",
"education-num 0.148123 \n",
"capital-gain 0.078409 \n",
"capital-loss 0.054256 \n",
"hours-per-week 1.000000 "
]
}
],
"prompt_number": 35
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.