Skip to content

Instantly share code, notes, and snippets.

@taylorterry3
Created April 12, 2016 21:15
Show Gist options
  • Save taylorterry3/5c4ba1cdd23c56ac106db6ccb6e221f7 to your computer and use it in GitHub Desktop.
Save taylorterry3/5c4ba1cdd23c56ac106db6ccb6e221f7 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import gender_guesser.detector as gender"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"b'Skipping line 35610: expected 19 fields, saw 20\\nSkipping line 35611: expected 19 fields, saw 20\\n'\n",
"b'Skipping line 146480: expected 19 fields, saw 20\\n'\n"
]
}
],
"source": [
"salary_data = pd.read_csv('~/Downloads/2015/ar_disbursements_emp_off_data_2015.txt',\n",
" sep='|',\n",
" quoting=3,\n",
" error_bad_lines=False)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Filter out salaries under $40k to try to get rid of partial year numbers. YMMV.\n",
"salary_data = salary_data[salary_data.TOTAL > 40000]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"d = gender.Detector(case_sensitive=False)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"salary_data['MODELED_GENDER'] = salary_data.FIRST_NAME.apply(lambda x: d.get_gender(str(x)))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Convert \"mostly_female\" & \"mostly_male\" to plain values\n",
"# Also \"andy\" is their default value for androgynous names. Weird.\n",
"salary_data['MODELED_GENDER'] = salary_data.MODELED_GENDER.apply(lambda x: x.replace('mostly_', '').replace('andy', 'uncertain'))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"pivoted_data = salary_data.pivot_table(index='TITLE', \n",
" columns='MODELED_GENDER', \n",
" values='TOTAL', \n",
" aggfunc=[np.mean, np.max, np.size])\\\n",
" .sort_values([('size', 'female')], ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th colspan=\"3\" halign=\"left\">mean</th>\n",
" <th colspan=\"3\" halign=\"left\">amax</th>\n",
" <th colspan=\"3\" halign=\"left\">size</th>\n",
" </tr>\n",
" <tr>\n",
" <th>MODELED_GENDER</th>\n",
" <th>female</th>\n",
" <th>male</th>\n",
" <th>uncertain</th>\n",
" <th>female</th>\n",
" <th>male</th>\n",
" <th>uncertain</th>\n",
" <th>female</th>\n",
" <th>male</th>\n",
" <th>uncertain</th>\n",
" </tr>\n",
" <tr>\n",
" <th>TITLE</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>DATA CONTROL CLERK</th>\n",
" <td>52019.400000</td>\n",
" <td>69089.00</td>\n",
" <td>40068.000000</td>\n",
" <td>75190</td>\n",
" <td>71054</td>\n",
" <td>40068</td>\n",
" <td>15</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DATA ENTRY CLERK</th>\n",
" <td>52228.909091</td>\n",
" <td>48432.25</td>\n",
" <td>55319.285714</td>\n",
" <td>83724</td>\n",
" <td>58557</td>\n",
" <td>63570</td>\n",
" <td>11</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DATA ENTRY</th>\n",
" <td>61488.142857</td>\n",
" <td>NaN</td>\n",
" <td>44507.000000</td>\n",
" <td>90761</td>\n",
" <td>NaN</td>\n",
" <td>44507</td>\n",
" <td>7</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DATA PROCESSING</th>\n",
" <td>64980.500000</td>\n",
" <td>91047.75</td>\n",
" <td>NaN</td>\n",
" <td>67939</td>\n",
" <td>121200</td>\n",
" <td>NaN</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DATABASE SPEC</th>\n",
" <td>82859.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>91952</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DATA PROCESS CLERK</th>\n",
" <td>44008.666667</td>\n",
" <td>NaN</td>\n",
" <td>62647.000000</td>\n",
" <td>45205</td>\n",
" <td>NaN</td>\n",
" <td>62647</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>AGREEMENT DATA SPECIALIST</th>\n",
" <td>73835.500000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>73837</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DATA ENTRY SPECIALIST</th>\n",
" <td>40971.500000</td>\n",
" <td>44280.00</td>\n",
" <td>NaN</td>\n",
" <td>41071</td>\n",
" <td>44280</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DATA ENTRY OPERATOR</th>\n",
" <td>58540.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>67081</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DATA DUES PROCESSOR 2</th>\n",
" <td>41135.500000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>41338</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DATA COORDINATOR</th>\n",
" <td>46392.500000</td>\n",
" <td>85621.50</td>\n",
" <td>54179.000000</td>\n",
" <td>52331</td>\n",
" <td>86185</td>\n",
" <td>54179</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DATA CLERK</th>\n",
" <td>57797.500000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>68180</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DATA &amp; TARGETING SPECIALI</th>\n",
" <td>62773.000000</td>\n",
" <td>51842.00</td>\n",
" <td>NaN</td>\n",
" <td>68227</td>\n",
" <td>51842</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>AGMTS TECH - DATABASE</th>\n",
" <td>51065.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>52295</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>FIELD DATA COORDINATOR</th>\n",
" <td>89358.000000</td>\n",
" <td>57619.50</td>\n",
" <td>NaN</td>\n",
" <td>98839</td>\n",
" <td>62650</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SUP DATA PROCESSING</th>\n",
" <td>93906.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>106932</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DATABASE MANAGER</th>\n",
" <td>66301.000000</td>\n",
" <td>57388.50</td>\n",
" <td>63381.000000</td>\n",
" <td>80724</td>\n",
" <td>67807</td>\n",
" <td>63381</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SR DATABASE ADMINISTRATOR</th>\n",
" <td>162306.500000</td>\n",
" <td>134746.50</td>\n",
" <td>196390.000000</td>\n",
" <td>206407</td>\n",
" <td>135826</td>\n",
" <td>196390</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>MEMBERSHIP DATA COORD</th>\n",
" <td>95197.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>95197</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>OPERATIONS &amp; DATA COOR</th>\n",
" <td>64486.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>64486</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>MEMBERSHIP DATA PROCESS</th>\n",
" <td>41463.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>41463</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>MEMBERSHIP/DATA ENTRY MGR</th>\n",
" <td>118054.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>118054</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ORGANIZING DATA ANALYST</th>\n",
" <td>47516.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>47516</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ORGANIZING DATA SPEC</th>\n",
" <td>69114.000000</td>\n",
" <td>64652.00</td>\n",
" <td>NaN</td>\n",
" <td>69114</td>\n",
" <td>64652</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>MATERIAL/DATA MOB COORD</th>\n",
" <td>77788.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>77788</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SR DATABASE SPECIALIST</th>\n",
" <td>67006.000000</td>\n",
" <td>72738.00</td>\n",
" <td>NaN</td>\n",
" <td>67006</td>\n",
" <td>79218</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SUPRDATA ENTRY OPR</th>\n",
" <td>63257.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>63257</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>UHWP DATA MANAGER</th>\n",
" <td>54605.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>54605</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>BUSINESS DATA ANALYST</th>\n",
" <td>54712.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>54712</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ASST. DATA ADMIN</th>\n",
" <td>52452.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>52452</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DATABASE ADMIN</th>\n",
" <td>NaN</td>\n",
" <td>100685.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>100685</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DATABASE ANALYST III</th>\n",
" <td>NaN</td>\n",
" <td>121695.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>121695</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DATABASE ANLYST</th>\n",
" <td>NaN</td>\n",
" <td>79862.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>84321</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DATABASE ASSISTANT</th>\n",
" <td>NaN</td>\n",
" <td>81193.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>81193</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DATABASE COORDINATOR</th>\n",
" <td>NaN</td>\n",
" <td>61435.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>61435</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DATABASE INFORMATION MNGR</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>66097.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>66097</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DATABASE OPERATIONS ASSOC</th>\n",
" <td>NaN</td>\n",
" <td>53741.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>53741</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DATABASE PROGRAMMER</th>\n",
" <td>NaN</td>\n",
" <td>82340.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>100118</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DATABASE SUPPORT COORD</th>\n",
" <td>NaN</td>\n",
" <td>70016.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>70016</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DATABASE/PROGRAM ANALYST</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>67029.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>67029</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DATAOPER</th>\n",
" <td>NaN</td>\n",
" <td>107138.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>107138</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DEVELOPMENT DATABASE ADMI</th>\n",
" <td>NaN</td>\n",
" <td>67681.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>67681</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DIR DATA AND TECHNOLOGY</th>\n",
" <td>NaN</td>\n",
" <td>152555.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>152555</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DIR OF DATABASE ENG'G</th>\n",
" <td>NaN</td>\n",
" <td>102499.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>102499</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ELEC DATA TRANS SPECIALIS</th>\n",
" <td>NaN</td>\n",
" <td>49749.00</td>\n",
" <td>51506.000000</td>\n",
" <td>NaN</td>\n",
" <td>56530</td>\n",
" <td>52791</td>\n",
" <td>NaN</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ELECT DATA TECHNICIAN</th>\n",
" <td>NaN</td>\n",
" <td>64330.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>64330</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>IT DATABASE ADMIN</th>\n",
" <td>NaN</td>\n",
" <td>75163.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>75163</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>IT/DATA COORDINATOR</th>\n",
" <td>NaN</td>\n",
" <td>79982.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>79982</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>JUNIOR DATABASE ADMINISTR</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>53318.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>53318</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>MEMBER DATA ANALYST</th>\n",
" <td>NaN</td>\n",
" <td>57854.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>57854</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ORG DATABASE ANALYST</th>\n",
" <td>NaN</td>\n",
" <td>67450.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>67450</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ORGANIZING DATABASE MGR</th>\n",
" <td>NaN</td>\n",
" <td>90599.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>90599</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PRODUCTN MGR/ASST DATA MG</th>\n",
" <td>NaN</td>\n",
" <td>62974.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>62974</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SENIOR DATABASE DEVELOPER</th>\n",
" <td>NaN</td>\n",
" <td>115576.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>115576</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SR DATA BASE ANALYST</th>\n",
" <td>NaN</td>\n",
" <td>83464.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>83464</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SR DATABASE ANALYST</th>\n",
" <td>NaN</td>\n",
" <td>108507.00</td>\n",
" <td>94792.000000</td>\n",
" <td>NaN</td>\n",
" <td>108507</td>\n",
" <td>94792</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SR MGR OF DATA PROCESSING</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>89521.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>89521</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SR PROGRAMMER/DATA MINER</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>109205.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>109205</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SR. DATABASE ADMINISTRAT</th>\n",
" <td>NaN</td>\n",
" <td>88349.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>88349</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SR. DATABASE COORD.</th>\n",
" <td>NaN</td>\n",
" <td>46183.00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>46183</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>107 rows × 9 columns</p>\n",
"</div>"
],
"text/plain": [
" mean amax \\\n",
"MODELED_GENDER female male uncertain female \n",
"TITLE \n",
"DATA CONTROL CLERK 52019.400000 69089.00 40068.000000 75190 \n",
"DATA ENTRY CLERK 52228.909091 48432.25 55319.285714 83724 \n",
"DATA ENTRY 61488.142857 NaN 44507.000000 90761 \n",
"DATA PROCESSING 64980.500000 91047.75 NaN 67939 \n",
"DATABASE SPEC 82859.000000 NaN NaN 91952 \n",
"DATA PROCESS CLERK 44008.666667 NaN 62647.000000 45205 \n",
"AGREEMENT DATA SPECIALIST 73835.500000 NaN NaN 73837 \n",
"DATA ENTRY SPECIALIST 40971.500000 44280.00 NaN 41071 \n",
"DATA ENTRY OPERATOR 58540.000000 NaN NaN 67081 \n",
"DATA DUES PROCESSOR 2 41135.500000 NaN NaN 41338 \n",
"DATA COORDINATOR 46392.500000 85621.50 54179.000000 52331 \n",
"DATA CLERK 57797.500000 NaN NaN 68180 \n",
"DATA & TARGETING SPECIALI 62773.000000 51842.00 NaN 68227 \n",
"AGMTS TECH - DATABASE 51065.000000 NaN NaN 52295 \n",
"FIELD DATA COORDINATOR 89358.000000 57619.50 NaN 98839 \n",
"SUP DATA PROCESSING 93906.000000 NaN NaN 106932 \n",
"DATABASE MANAGER 66301.000000 57388.50 63381.000000 80724 \n",
"SR DATABASE ADMINISTRATOR 162306.500000 134746.50 196390.000000 206407 \n",
"MEMBERSHIP DATA COORD 95197.000000 NaN NaN 95197 \n",
"OPERATIONS & DATA COOR 64486.000000 NaN NaN 64486 \n",
"MEMBERSHIP DATA PROCESS 41463.000000 NaN NaN 41463 \n",
"MEMBERSHIP/DATA ENTRY MGR 118054.000000 NaN NaN 118054 \n",
"ORGANIZING DATA ANALYST 47516.000000 NaN NaN 47516 \n",
"ORGANIZING DATA SPEC 69114.000000 64652.00 NaN 69114 \n",
"MATERIAL/DATA MOB COORD 77788.000000 NaN NaN 77788 \n",
"SR DATABASE SPECIALIST 67006.000000 72738.00 NaN 67006 \n",
"SUPRDATA ENTRY OPR 63257.000000 NaN NaN 63257 \n",
"UHWP DATA MANAGER 54605.000000 NaN NaN 54605 \n",
"BUSINESS DATA ANALYST 54712.000000 NaN NaN 54712 \n",
"ASST. DATA ADMIN 52452.000000 NaN NaN 52452 \n",
"... ... ... ... ... \n",
"DATABASE ADMIN NaN 100685.00 NaN NaN \n",
"DATABASE ANALYST III NaN 121695.00 NaN NaN \n",
"DATABASE ANLYST NaN 79862.00 NaN NaN \n",
"DATABASE ASSISTANT NaN 81193.00 NaN NaN \n",
"DATABASE COORDINATOR NaN 61435.00 NaN NaN \n",
"DATABASE INFORMATION MNGR NaN NaN 66097.000000 NaN \n",
"DATABASE OPERATIONS ASSOC NaN 53741.00 NaN NaN \n",
"DATABASE PROGRAMMER NaN 82340.00 NaN NaN \n",
"DATABASE SUPPORT COORD NaN 70016.00 NaN NaN \n",
"DATABASE/PROGRAM ANALYST NaN NaN 67029.000000 NaN \n",
"DATAOPER NaN 107138.00 NaN NaN \n",
"DEVELOPMENT DATABASE ADMI NaN 67681.00 NaN NaN \n",
"DIR DATA AND TECHNOLOGY NaN 152555.00 NaN NaN \n",
"DIR OF DATABASE ENG'G NaN 102499.00 NaN NaN \n",
"ELEC DATA TRANS SPECIALIS NaN 49749.00 51506.000000 NaN \n",
"ELECT DATA TECHNICIAN NaN 64330.00 NaN NaN \n",
"IT DATABASE ADMIN NaN 75163.00 NaN NaN \n",
"IT/DATA COORDINATOR NaN 79982.00 NaN NaN \n",
"JUNIOR DATABASE ADMINISTR NaN NaN 53318.000000 NaN \n",
"MEMBER DATA ANALYST NaN 57854.00 NaN NaN \n",
"ORG DATABASE ANALYST NaN 67450.00 NaN NaN \n",
"ORGANIZING DATABASE MGR NaN 90599.00 NaN NaN \n",
"PRODUCTN MGR/ASST DATA MG NaN 62974.00 NaN NaN \n",
"SENIOR DATABASE DEVELOPER NaN 115576.00 NaN NaN \n",
"SR DATA BASE ANALYST NaN 83464.00 NaN NaN \n",
"SR DATABASE ANALYST NaN 108507.00 94792.000000 NaN \n",
"SR MGR OF DATA PROCESSING NaN NaN 89521.000000 NaN \n",
"SR PROGRAMMER/DATA MINER NaN NaN 109205.000000 NaN \n",
"SR. DATABASE ADMINISTRAT NaN 88349.00 NaN NaN \n",
"SR. DATABASE COORD. NaN 46183.00 NaN NaN \n",
"\n",
" size \n",
"MODELED_GENDER male uncertain female male uncertain \n",
"TITLE \n",
"DATA CONTROL CLERK 71054 40068 15 3 1 \n",
"DATA ENTRY CLERK 58557 63570 11 8 7 \n",
"DATA ENTRY NaN 44507 7 NaN 1 \n",
"DATA PROCESSING 121200 NaN 4 4 NaN \n",
"DATABASE SPEC NaN NaN 3 NaN NaN \n",
"DATA PROCESS CLERK NaN 62647 3 NaN 1 \n",
"AGREEMENT DATA SPECIALIST NaN NaN 2 NaN NaN \n",
"DATA ENTRY SPECIALIST 44280 NaN 2 1 NaN \n",
"DATA ENTRY OPERATOR NaN NaN 2 NaN NaN \n",
"DATA DUES PROCESSOR 2 NaN NaN 2 NaN NaN \n",
"DATA COORDINATOR 86185 54179 2 2 1 \n",
"DATA CLERK NaN NaN 2 NaN NaN \n",
"DATA & TARGETING SPECIALI 51842 NaN 2 1 NaN \n",
"AGMTS TECH - DATABASE NaN NaN 2 NaN NaN \n",
"FIELD DATA COORDINATOR 62650 NaN 2 2 NaN \n",
"SUP DATA PROCESSING NaN NaN 2 NaN NaN \n",
"DATABASE MANAGER 67807 63381 2 2 1 \n",
"SR DATABASE ADMINISTRATOR 135826 196390 2 2 1 \n",
"MEMBERSHIP DATA COORD NaN NaN 1 NaN NaN \n",
"OPERATIONS & DATA COOR NaN NaN 1 NaN NaN \n",
"MEMBERSHIP DATA PROCESS NaN NaN 1 NaN NaN \n",
"MEMBERSHIP/DATA ENTRY MGR NaN NaN 1 NaN NaN \n",
"ORGANIZING DATA ANALYST NaN NaN 1 NaN NaN \n",
"ORGANIZING DATA SPEC 64652 NaN 1 1 NaN \n",
"MATERIAL/DATA MOB COORD NaN NaN 1 NaN NaN \n",
"SR DATABASE SPECIALIST 79218 NaN 1 2 NaN \n",
"SUPRDATA ENTRY OPR NaN NaN 1 NaN NaN \n",
"UHWP DATA MANAGER NaN NaN 1 NaN NaN \n",
"BUSINESS DATA ANALYST NaN NaN 1 NaN NaN \n",
"ASST. DATA ADMIN NaN NaN 1 NaN NaN \n",
"... ... ... ... ... ... \n",
"DATABASE ADMIN 100685 NaN NaN 1 NaN \n",
"DATABASE ANALYST III 121695 NaN NaN 1 NaN \n",
"DATABASE ANLYST 84321 NaN NaN 3 NaN \n",
"DATABASE ASSISTANT 81193 NaN NaN 1 NaN \n",
"DATABASE COORDINATOR 61435 NaN NaN 1 NaN \n",
"DATABASE INFORMATION MNGR NaN 66097 NaN NaN 1 \n",
"DATABASE OPERATIONS ASSOC 53741 NaN NaN 1 NaN \n",
"DATABASE PROGRAMMER 100118 NaN NaN 2 NaN \n",
"DATABASE SUPPORT COORD 70016 NaN NaN 1 NaN \n",
"DATABASE/PROGRAM ANALYST NaN 67029 NaN NaN 1 \n",
"DATAOPER 107138 NaN NaN 1 NaN \n",
"DEVELOPMENT DATABASE ADMI 67681 NaN NaN 1 NaN \n",
"DIR DATA AND TECHNOLOGY 152555 NaN NaN 1 NaN \n",
"DIR OF DATABASE ENG'G 102499 NaN NaN 1 NaN \n",
"ELEC DATA TRANS SPECIALIS 56530 52791 NaN 6 2 \n",
"ELECT DATA TECHNICIAN 64330 NaN NaN 1 NaN \n",
"IT DATABASE ADMIN 75163 NaN NaN 1 NaN \n",
"IT/DATA COORDINATOR 79982 NaN NaN 1 NaN \n",
"JUNIOR DATABASE ADMINISTR NaN 53318 NaN NaN 1 \n",
"MEMBER DATA ANALYST 57854 NaN NaN 1 NaN \n",
"ORG DATABASE ANALYST 67450 NaN NaN 1 NaN \n",
"ORGANIZING DATABASE MGR 90599 NaN NaN 1 NaN \n",
"PRODUCTN MGR/ASST DATA MG 62974 NaN NaN 1 NaN \n",
"SENIOR DATABASE DEVELOPER 115576 NaN NaN 1 NaN \n",
"SR DATA BASE ANALYST 83464 NaN NaN 1 NaN \n",
"SR DATABASE ANALYST 108507 94792 NaN 1 1 \n",
"SR MGR OF DATA PROCESSING NaN 89521 NaN NaN 1 \n",
"SR PROGRAMMER/DATA MINER NaN 109205 NaN NaN 1 \n",
"SR. DATABASE ADMINISTRAT 88349 NaN NaN 1 NaN \n",
"SR. DATABASE COORD. 46183 NaN NaN 1 NaN \n",
"\n",
"[107 rows x 9 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pivoted_data[pivoted_data.index.str.contains('DATA')]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment