Skip to content

Instantly share code, notes, and snippets.

@mjbommar
Last active August 29, 2015 14:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mjbommar/4cdf9e188c505a328809 to your computer and use it in GitHub Desktop.
Save mjbommar/4cdf9e188c505a328809 to your computer and use it in GitHub Desktop.
Sample is_male name probability estimates, conditioned by name, name/year, name/state, and name/year/state
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "",
"signature": "sha256:93ee95c96f30075d52fc69273cdd5b032909e7e1f4a284aaaeae74d821041049"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Imports\n",
"import requests\n",
"import glob\n",
"import pandas"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Read data\n",
"name_data = pandas.read_csv('names/all-states', header=None)\n",
"name_data.columns = ['state', 'sex', 'year', 'name', 'count']"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Map sex to is_male\n",
"name_data['is_male'] = (name_data['sex'] == 'M')\n",
"name_data['male_count'] = name_data['is_male'] * name_data['count']\n",
"\n",
"# Map name to LC\n",
"name_data['name'] = name_data['name'].apply(str.lower).apply(str.strip)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 44
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"name_data.head()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>state</th>\n",
" <th>sex</th>\n",
" <th>year</th>\n",
" <th>name</th>\n",
" <th>count</th>\n",
" <th>is_male</th>\n",
" <th>male_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> AK</td>\n",
" <td> F</td>\n",
" <td> 1910</td>\n",
" <td> mary</td>\n",
" <td> 14</td>\n",
" <td> False</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> AK</td>\n",
" <td> F</td>\n",
" <td> 1910</td>\n",
" <td> annie</td>\n",
" <td> 12</td>\n",
" <td> False</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> AK</td>\n",
" <td> F</td>\n",
" <td> 1910</td>\n",
" <td> anna</td>\n",
" <td> 10</td>\n",
" <td> False</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> AK</td>\n",
" <td> F</td>\n",
" <td> 1910</td>\n",
" <td> margaret</td>\n",
" <td> 8</td>\n",
" <td> False</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> AK</td>\n",
" <td> F</td>\n",
" <td> 1910</td>\n",
" <td> helen</td>\n",
" <td> 7</td>\n",
" <td> False</td>\n",
" <td> 0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows \u00d7 7 columns</p>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 45,
"text": [
" state sex year name count is_male male_count\n",
"0 AK F 1910 mary 14 False 0\n",
"1 AK F 1910 annie 12 False 0\n",
"2 AK F 1910 anna 10 False 0\n",
"3 AK F 1910 margaret 8 False 0\n",
"4 AK F 1910 helen 7 False 0\n",
"\n",
"[5 rows x 7 columns]"
]
}
],
"prompt_number": 45
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Get probability male by name\n",
"prob_male = name_data.groupby('name')['male_count'].sum() / name_data.groupby('name')['count'].sum()\n",
"prob_male_year = name_data.groupby(['name', 'year'])['male_count'].sum() / name_data.groupby(['name', 'year'])['count'].sum()\n",
"prob_male_state = name_data.groupby(['name', 'state'])['male_count'].sum() / name_data.groupby(['name', 'state'])['count'].sum()\n",
"prob_male_year_state = name_data.groupby(['name', 'year', 'state'])['male_count'].sum() / name_data.groupby(['name', 'year', 'state'])['count'].sum()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"prob_male['michael']"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 57,
"text": [
"0.99599974867530949"
]
}
],
"prompt_number": 57
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"prob_male_state[('michael', 'CA')]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 58,
"text": [
"0.99425400243755246"
]
}
],
"prompt_number": 58
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"prob_male_year[('michael', 2000)]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 59,
"text": [
"0.99825441850316388"
]
}
],
"prompt_number": 59
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"prob_male_year[('michael', 1960)]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 65,
"text": [
"0.99675659039524611"
]
}
],
"prompt_number": 65
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"prob_male_year[('chris', 1970)]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 71,
"text": [
"0.91570959803117313"
]
}
],
"prompt_number": 71
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"prob_male_year[('amy', 2000)]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 73,
"text": [
"0.0"
]
}
],
"prompt_number": 73
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment