Skip to content

Instantly share code, notes, and snippets.

@SonerYldrm
Created April 15, 2019 11:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save SonerYldrm/9f95a9f176688a1ac307a2379452cd3a to your computer and use it in GitHub Desktop.
Save SonerYldrm/9f95a9f176688a1ac307a2379452cd3a to your computer and use it in GitHub Desktop.
Created on Cognitive Class Labs
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"#import required libraries\n",
"import requests\n",
"import lxml.html as lh\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'\n",
"#Create a handle, page, to handle the contents of the website\n",
"page = requests.get(url)\n",
"#Store the contents of the website under doc\n",
"doc = lh.fromstring(page.content)\n",
"#Parse data that are stored between <tr>..</tr> of HTML\n",
"tr_elements = doc.xpath('//tr')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Check the length of the first 12 rows\n",
"[len(T) for T in tr_elements[:12]]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1:\"Postcode\"\n",
"2:\"Borough\"\n",
"3:\"Neighbourhood\n",
"\"\n"
]
}
],
"source": [
"tr_elements = doc.xpath('//tr')\n",
"#Create empty list\n",
"col=[]\n",
"i=0\n",
"#For each row, store each first element (header) and an empty list\n",
"for t in tr_elements[0]:\n",
" i+=1\n",
" name=t.text_content()\n",
" print('%d:\"%s\"'%(i,name))\n",
" col.append((name,[]))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"294"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(tr_elements)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"#Since out first row is the header, data is stored on the second row onwards\n",
"for j in range(1,len(tr_elements)):\n",
" #T is our j'th row\n",
" T=tr_elements[j]\n",
" \n",
" #If row is not of size 3, the //tr data is not from our table \n",
" if len(T)!=3:\n",
" break\n",
" \n",
" #i is the index of our column\n",
" i=0\n",
" \n",
" #Iterate through each element of the row\n",
" for t in T.iterchildren():\n",
" data=t.text_content() \n",
" #Check if row is empty\n",
"\n",
" #Append the data to the empty list of the i'th column\n",
" col[i][1].append(data)\n",
" #Increment i for the next column\n",
" i+=1"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[288, 288, 288]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[len(C) for (title,C) in col]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"#Create a dataframe with exported data\n",
"Dict={title:column for (title,column) in col}\n",
"df=pd.DataFrame(Dict)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Postcode</th>\n",
" <th>Borough</th>\n",
" <th>Neighbourhood</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>M1A</td>\n",
" <td>Not assigned</td>\n",
" <td>Not assigned\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>M2A</td>\n",
" <td>Not assigned</td>\n",
" <td>Not assigned\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>M3A</td>\n",
" <td>North York</td>\n",
" <td>Parkwoods\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>M4A</td>\n",
" <td>North York</td>\n",
" <td>Victoria Village\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>M5A</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Harbourfront\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>M5A</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Regent Park\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>M6A</td>\n",
" <td>North York</td>\n",
" <td>Lawrence Heights\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>M6A</td>\n",
" <td>North York</td>\n",
" <td>Lawrence Manor\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>M7A</td>\n",
" <td>Queen's Park</td>\n",
" <td>Not assigned\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>M8A</td>\n",
" <td>Not assigned</td>\n",
" <td>Not assigned\\n</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Postcode Borough Neighbourhood\\n\n",
"0 M1A Not assigned Not assigned\\n\n",
"1 M2A Not assigned Not assigned\\n\n",
"2 M3A North York Parkwoods\\n\n",
"3 M4A North York Victoria Village\\n\n",
"4 M5A Downtown Toronto Harbourfront\\n\n",
"5 M5A Downtown Toronto Regent Park\\n\n",
"6 M6A North York Lawrence Heights\\n\n",
"7 M6A North York Lawrence Manor\\n\n",
"8 M7A Queen's Park Not assigned\\n\n",
"9 M8A Not assigned Not assigned\\n"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(288, 3)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# Ignore cells with a borough that is Not assigned.\n",
"df_1 = df[df['Borough'] != 'Not assigned'].reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>Postcode</th>\n",
" <th>Borough</th>\n",
" <th>Neighbourhood</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>M3A</td>\n",
" <td>North York</td>\n",
" <td>Parkwoods\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3</td>\n",
" <td>M4A</td>\n",
" <td>North York</td>\n",
" <td>Victoria Village\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>4</td>\n",
" <td>M5A</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Harbourfront\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5</td>\n",
" <td>M5A</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Regent Park\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>6</td>\n",
" <td>M6A</td>\n",
" <td>North York</td>\n",
" <td>Lawrence Heights\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>7</td>\n",
" <td>M6A</td>\n",
" <td>North York</td>\n",
" <td>Lawrence Manor\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>8</td>\n",
" <td>M7A</td>\n",
" <td>Queen's Park</td>\n",
" <td>Not assigned\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>10</td>\n",
" <td>M9A</td>\n",
" <td>Etobicoke</td>\n",
" <td>Islington Avenue\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>11</td>\n",
" <td>M1B</td>\n",
" <td>Scarborough</td>\n",
" <td>Rouge\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>12</td>\n",
" <td>M1B</td>\n",
" <td>Scarborough</td>\n",
" <td>Malvern\\n</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" index Postcode Borough Neighbourhood\\n\n",
"0 2 M3A North York Parkwoods\\n\n",
"1 3 M4A North York Victoria Village\\n\n",
"2 4 M5A Downtown Toronto Harbourfront\\n\n",
"3 5 M5A Downtown Toronto Regent Park\\n\n",
"4 6 M6A North York Lawrence Heights\\n\n",
"5 7 M6A North York Lawrence Manor\\n\n",
"6 8 M7A Queen's Park Not assigned\\n\n",
"7 10 M9A Etobicoke Islington Avenue\\n\n",
"8 11 M1B Scarborough Rouge\\n\n",
"9 12 M1B Scarborough Malvern\\n"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_1.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"103"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_1['Postcode'].unique().size"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"df_1.columns = ['Drop','Postcode', 'Borough', 'Neighborhood']"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Drop</th>\n",
" <th>Postcode</th>\n",
" <th>Borough</th>\n",
" <th>Neighborhood</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>M3A</td>\n",
" <td>North York</td>\n",
" <td>Parkwoods\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3</td>\n",
" <td>M4A</td>\n",
" <td>North York</td>\n",
" <td>Victoria Village\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>4</td>\n",
" <td>M5A</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Harbourfront\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5</td>\n",
" <td>M5A</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Regent Park\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>6</td>\n",
" <td>M6A</td>\n",
" <td>North York</td>\n",
" <td>Lawrence Heights\\n</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Drop Postcode Borough Neighborhood\n",
"0 2 M3A North York Parkwoods\\n\n",
"1 3 M4A North York Victoria Village\\n\n",
"2 4 M5A Downtown Toronto Harbourfront\\n\n",
"3 5 M5A Downtown Toronto Regent Park\\n\n",
"4 6 M6A North York Lawrence Heights\\n"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_1.head()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"df_2 = df_1[['Postcode','Borough','Neighborhood']]"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Postcode</th>\n",
" <th>Borough</th>\n",
" <th>Neighborhood</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>M3A</td>\n",
" <td>North York</td>\n",
" <td>Parkwoods\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>M4A</td>\n",
" <td>North York</td>\n",
" <td>Victoria Village\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>M5A</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Harbourfront\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>M5A</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Regent Park\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>M6A</td>\n",
" <td>North York</td>\n",
" <td>Lawrence Heights\\n</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Postcode Borough Neighborhood\n",
"0 M3A North York Parkwoods\\n\n",
"1 M4A North York Victoria Village\\n\n",
"2 M5A Downtown Toronto Harbourfront\\n\n",
"3 M5A Downtown Toronto Regent Park\\n\n",
"4 M6A North York Lawrence Heights\\n"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_2.head()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"df_2 = df_2.groupby(['Postcode','Borough'])['Neighborhood'].apply(', '.join).reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Postcode</th>\n",
" <th>Borough</th>\n",
" <th>Neighborhood</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>M1B</td>\n",
" <td>Scarborough</td>\n",
" <td>Rouge\\n, Malvern\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>M1C</td>\n",
" <td>Scarborough</td>\n",
" <td>Highland Creek\\n, Rouge Hill\\n, Port Union\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>M1E</td>\n",
" <td>Scarborough</td>\n",
" <td>Guildwood\\n, Morningside\\n, West Hill\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>M1G</td>\n",
" <td>Scarborough</td>\n",
" <td>Woburn\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>M1H</td>\n",
" <td>Scarborough</td>\n",
" <td>Cedarbrae\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>M1J</td>\n",
" <td>Scarborough</td>\n",
" <td>Scarborough Village\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>M1K</td>\n",
" <td>Scarborough</td>\n",
" <td>East Birchmount Park\\n, Ionview\\n, Kennedy Park\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>M1L</td>\n",
" <td>Scarborough</td>\n",
" <td>Clairlea\\n, Golden Mile\\n, Oakridge\\n</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>M1M</td>\n",
" <td>Scarborough</td>\n",
" <td>Cliffcrest\\n, Cliffside\\n, Scarborough Village...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>M1N</td>\n",
" <td>Scarborough</td>\n",
" <td>Birch Cliff\\n, Cliffside West\\n</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Postcode Borough Neighborhood\n",
"0 M1B Scarborough Rouge\\n, Malvern\\n\n",
"1 M1C Scarborough Highland Creek\\n, Rouge Hill\\n, Port Union\\n\n",
"2 M1E Scarborough Guildwood\\n, Morningside\\n, West Hill\\n\n",
"3 M1G Scarborough Woburn\\n\n",
"4 M1H Scarborough Cedarbrae\\n\n",
"5 M1J Scarborough Scarborough Village\\n\n",
"6 M1K Scarborough East Birchmount Park\\n, Ionview\\n, Kennedy Park\\n\n",
"7 M1L Scarborough Clairlea\\n, Golden Mile\\n, Oakridge\\n\n",
"8 M1M Scarborough Cliffcrest\\n, Cliffside\\n, Scarborough Village...\n",
"9 M1N Scarborough Birch Cliff\\n, Cliffside West\\n"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_2.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(103, 3)"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_2.shape"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"#Create a dataframe including lotitude and longitude data\n",
"url_coordinates = 'http://cocl.us/Geospatial_data'\n",
"coordinates = pd.read_csv(url_coordinates)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Postal Code</th>\n",
" <th>Latitude</th>\n",
" <th>Longitude</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>M1B</td>\n",
" <td>43.806686</td>\n",
" <td>-79.194353</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>M1C</td>\n",
" <td>43.784535</td>\n",
" <td>-79.160497</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>M1E</td>\n",
" <td>43.763573</td>\n",
" <td>-79.188711</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>M1G</td>\n",
" <td>43.770992</td>\n",
" <td>-79.216917</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>M1H</td>\n",
" <td>43.773136</td>\n",
" <td>-79.239476</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Postal Code Latitude Longitude\n",
"0 M1B 43.806686 -79.194353\n",
"1 M1C 43.784535 -79.160497\n",
"2 M1E 43.763573 -79.188711\n",
"3 M1G 43.770992 -79.216917\n",
"4 M1H 43.773136 -79.239476"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"coordinates.head()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"#Change the name of column in order to merge with other dataframe\n",
"coordinates.rename(columns={\"Postal Code\":\"Postcode\"}, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Postcode</th>\n",
" <th>Latitude</th>\n",
" <th>Longitude</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>M1B</td>\n",
" <td>43.806686</td>\n",
" <td>-79.194353</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>M1C</td>\n",
" <td>43.784535</td>\n",
" <td>-79.160497</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>M1E</td>\n",
" <td>43.763573</td>\n",
" <td>-79.188711</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>M1G</td>\n",
" <td>43.770992</td>\n",
" <td>-79.216917</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>M1H</td>\n",
" <td>43.773136</td>\n",
" <td>-79.239476</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Postcode Latitude Longitude\n",
"0 M1B 43.806686 -79.194353\n",
"1 M1C 43.784535 -79.160497\n",
"2 M1E 43.763573 -79.188711\n",
"3 M1G 43.770992 -79.216917\n",
"4 M1H 43.773136 -79.239476"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"coordinates.head()"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"df_merged = pd.merge(df_2, coordinates, on='Postcode') # Merge two dataframes on Postcode"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Postcode</th>\n",
" <th>Borough</th>\n",
" <th>Neighborhood</th>\n",
" <th>Latitude</th>\n",
" <th>Longitude</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>M1B</td>\n",
" <td>Scarborough</td>\n",
" <td>Rouge\\n, Malvern\\n</td>\n",
" <td>43.806686</td>\n",
" <td>-79.194353</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>M1C</td>\n",
" <td>Scarborough</td>\n",
" <td>Highland Creek\\n, Rouge Hill\\n, Port Union\\n</td>\n",
" <td>43.784535</td>\n",
" <td>-79.160497</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>M1E</td>\n",
" <td>Scarborough</td>\n",
" <td>Guildwood\\n, Morningside\\n, West Hill\\n</td>\n",
" <td>43.763573</td>\n",
" <td>-79.188711</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>M1G</td>\n",
" <td>Scarborough</td>\n",
" <td>Woburn\\n</td>\n",
" <td>43.770992</td>\n",
" <td>-79.216917</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>M1H</td>\n",
" <td>Scarborough</td>\n",
" <td>Cedarbrae\\n</td>\n",
" <td>43.773136</td>\n",
" <td>-79.239476</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Postcode Borough Neighborhood \\\n",
"0 M1B Scarborough Rouge\\n, Malvern\\n \n",
"1 M1C Scarborough Highland Creek\\n, Rouge Hill\\n, Port Union\\n \n",
"2 M1E Scarborough Guildwood\\n, Morningside\\n, West Hill\\n \n",
"3 M1G Scarborough Woburn\\n \n",
"4 M1H Scarborough Cedarbrae\\n \n",
"\n",
" Latitude Longitude \n",
"0 43.806686 -79.194353 \n",
"1 43.784535 -79.160497 \n",
"2 43.763573 -79.188711 \n",
"3 43.770992 -79.216917 \n",
"4 43.773136 -79.239476 "
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_merged.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment