Skip to content

Instantly share code, notes, and snippets.

@TDahlberg
Created August 28, 2016 22:18
Show Gist options
  • Save TDahlberg/f0ac84468003e66aa3f302d17b776cdf to your computer and use it in GitHub Desktop.
Save TDahlberg/f0ac84468003e66aa3f302d17b776cdf to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import pandas as pd\n",
"from geopy.geocoders import GoogleV3\n",
"import json\n",
"import googlemaps"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [],
"source": [
"# Add the data using pandas and check that it's working correctly\n",
"data = pd.read_excel('/Users/tylerdahlberg/projects/profdev/chantal_geocoding/Mod TRACKSmapping Allentown.xlsx',sheetname='Location Information')"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(366, 14)\n"
]
},
{
"data": {
"text/plain": [
"0 NaN\n",
"1 NaN\n",
"2 NaN\n",
"3 5th & Linden, Allentown, PA\n",
"4 NaN\n",
"Name: gcaddress, dtype: object"
]
},
"execution_count": 106,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create a merged address field\n",
"data['gcaddress'] = data['CrossStreet A'] + \" & \" + data['CrossStreet B'] + \", \" + data['City'] + \", \" + data['State (eg PA)']\n",
"print(data.shape)\n",
"data['gcaddress'].head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"(108, 14)"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"'''# Drop rows with missing addresse values\n",
"cleandata = data.dropna(subset=['gcaddress'])\n",
"lessdata = cleandata.groupby(['gcaddress']).head(1)\n",
"lessdata.shape'''"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Geocode using googlemaps"
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#### Test using google maps\n",
"#gmaps.geocode('1716 Mount Vernon St Philadelphia PA')[0]['geometry']['location']['lat']\n",
"#[0]['']['lat']\n",
"gmaps = googlemaps.Client(key='',queries_per_second=50)"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"data['lat'] = ''\n",
"data['lon'] = ''"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Geocoding complete\n"
]
}
],
"source": [
"data['lat'] = data['gcaddress'].apply(gmaps.geocode).apply(lambda x: (x[0]['geometry']['location']['lat']))\n",
"data['lon'] = data['gcaddress'].apply(gmaps.geocode).apply(lambda x: (x[0]['geometry']['location']['lng']))\n",
"print('Geocoding complete')"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Item_Number</th>\n",
" <th>CrossStreet A Question Name</th>\n",
" <th>CrossStreet A</th>\n",
" <th>CrossStreet B Question Name</th>\n",
" <th>CrossStreet B</th>\n",
" <th>Distance Descriptor</th>\n",
" <th>City</th>\n",
" <th>State (eg PA)</th>\n",
" <th>Zipcode</th>\n",
" <th>Item 1D (Participant ID)</th>\n",
" <th>Optional: Exact Street Address</th>\n",
" <th>Community</th>\n",
" <th>Notes</th>\n",
" <th>gcaddress</th>\n",
" <th>lat</th>\n",
" <th>lon</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Item_6</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.5 to 1</td>\n",
" <td>Allentown</td>\n",
" <td>PA</td>\n",
" <td>18102</td>\n",
" <td>Allen316c2</td>\n",
" <td>101 Tilghman St</td>\n",
" <td>Allentown</td>\n",
" <td>Elias Market \"Syrian place\"?</td>\n",
" <td>NaN</td>\n",
" <td>18.775632</td>\n",
" <td>100.773042</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Item_4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1 to 2</td>\n",
" <td>Allentown</td>\n",
" <td>PA</td>\n",
" <td>18102</td>\n",
" <td>Allen324s7</td>\n",
" <td>106 N 17th Street</td>\n",
" <td>Allentown</td>\n",
" <td>\"William\" school (William Allen HS?)</td>\n",
" <td>NaN</td>\n",
" <td>18.775632</td>\n",
" <td>100.773042</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Item_3</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1 to 2</td>\n",
" <td>Allentown</td>\n",
" <td>PA</td>\n",
" <td>18102</td>\n",
" <td>Allen316c5</td>\n",
" <td>106 N 17th Street</td>\n",
" <td>Allentown</td>\n",
" <td>William Allen High School</td>\n",
" <td>NaN</td>\n",
" <td>18.775632</td>\n",
" <td>100.773042</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Item_11</td>\n",
" <td>Pantry_1a</td>\n",
" <td>5th</td>\n",
" <td>Pantry_1b</td>\n",
" <td>Linden</td>\n",
" <td>0.5 to 1</td>\n",
" <td>Allentown</td>\n",
" <td>PA</td>\n",
" <td>18102</td>\n",
" <td>Allen324c4</td>\n",
" <td>108 N 5th St</td>\n",
" <td>Allentown</td>\n",
" <td>Grace Episcopal Church</td>\n",
" <td>5th &amp; Linden, Allentown, PA</td>\n",
" <td>40.604685</td>\n",
" <td>-75.468577</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Item_11</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.5 to 1</td>\n",
" <td>Allentown</td>\n",
" <td>PA</td>\n",
" <td>18102</td>\n",
" <td>Allen324s7</td>\n",
" <td>112 N 5th Street</td>\n",
" <td>Allentown</td>\n",
" <td>New Bethany Ministries</td>\n",
" <td>NaN</td>\n",
" <td>18.775632</td>\n",
" <td>100.773042</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Item_Number CrossStreet A Question Name CrossStreet A \\\n",
"0 Item_6 NaN NaN \n",
"1 Item_4 NaN NaN \n",
"2 Item_3 NaN NaN \n",
"3 Item_11 Pantry_1a 5th \n",
"4 Item_11 NaN NaN \n",
"\n",
" CrossStreet B Question Name CrossStreet B Distance Descriptor City \\\n",
"0 NaN NaN 0.5 to 1 Allentown \n",
"1 NaN NaN 1 to 2 Allentown \n",
"2 NaN NaN 1 to 2 Allentown \n",
"3 Pantry_1b Linden 0.5 to 1 Allentown \n",
"4 NaN NaN 0.5 to 1 Allentown \n",
"\n",
" State (eg PA) Zipcode Item 1D (Participant ID) \\\n",
"0 PA 18102 Allen316c2 \n",
"1 PA 18102 Allen324s7 \n",
"2 PA 18102 Allen316c5 \n",
"3 PA 18102 Allen324c4 \n",
"4 PA 18102 Allen324s7 \n",
"\n",
" Optional: Exact Street Address Community \\\n",
"0 101 Tilghman St Allentown \n",
"1 106 N 17th Street Allentown \n",
"2 106 N 17th Street Allentown \n",
"3 108 N 5th St Allentown \n",
"4 112 N 5th Street Allentown \n",
"\n",
" Notes gcaddress \\\n",
"0 Elias Market \"Syrian place\"? NaN \n",
"1 \"William\" school (William Allen HS?) NaN \n",
"2 William Allen High School NaN \n",
"3 Grace Episcopal Church 5th & Linden, Allentown, PA \n",
"4 New Bethany Ministries NaN \n",
"\n",
" lat lon \n",
"0 18.775632 100.773042 \n",
"1 18.775632 100.773042 \n",
"2 18.775632 100.773042 \n",
"3 40.604685 -75.468577 \n",
"4 18.775632 100.773042 "
]
},
"execution_count": 110,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"data.to_csv('/Users/tylerdahlberg/projects/profdev/chantal_geocoding/geocoded/geocoded_Mod TRACKSmapping Allentown.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [Root]",
"language": "python",
"name": "Python [Root]"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment