SonerYldrm/Coursera_Canada_PostalCodes.ipynb

## Coursera_Canada_PostalCodes.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#import required libraries\n",
    "import requests\n",
    "import lxml.html as lh\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'\n",
    "#Create a handle, page, to handle the contents of the website\n",
    "page = requests.get(url)\n",
    "#Store the contents of the website under doc\n",
    "doc = lh.fromstring(page.content)\n",
    "#Parse data that are stored between <tr>..</tr> of HTML\n",
    "tr_elements = doc.xpath('//tr')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Check the length of the first 12 rows\n",
    "[len(T) for T in tr_elements[:12]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1:\"Postcode\"\n",
      "2:\"Borough\"\n",
      "3:\"Neighbourhood\n",
      "\"\n"
     ]
    }
   ],
   "source": [
    "tr_elements = doc.xpath('//tr')\n",
    "#Create empty list\n",
    "col=[]\n",
    "i=0\n",
    "#For each row, store each first element (header) and an empty list\n",
    "for t in tr_elements[0]:\n",
    "    i+=1\n",
    "    name=t.text_content()\n",
    "    print('%d:\"%s\"'%(i,name))\n",
    "    col.append((name,[]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "294"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(tr_elements)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Since out first row is the header, data is stored on the second row onwards\n",
    "for j in range(1,len(tr_elements)):\n",
    "    #T is our j'th row\n",
    "    T=tr_elements[j]\n",
    "    \n",
    "    #If row is not of size 3, the //tr data is not from our table \n",
    "    if len(T)!=3:\n",
    "        break\n",
    "    \n",
    "    #i is the index of our column\n",
    "    i=0\n",
    "    \n",
    "    #Iterate through each element of the row\n",
    "    for t in T.iterchildren():\n",
    "        data=t.text_content() \n",
    "        #Check if row is empty\n",
    "\n",
    "        #Append the data to the empty list of the i'th column\n",
    "        col[i][1].append(data)\n",
    "        #Increment i for the next column\n",
    "        i+=1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[288, 288, 288]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[len(C) for (title,C) in col]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Create a dataframe with exported data\n",
    "Dict={title:column for (title,column) in col}\n",
    "df=pd.DataFrame(Dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Postcode</th>\n",
       "      <th>Borough</th>\n",
       "      <th>Neighbourhood</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>M1A</td>\n",
       "      <td>Not assigned</td>\n",
       "      <td>Not assigned\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>M2A</td>\n",
       "      <td>Not assigned</td>\n",
       "      <td>Not assigned\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M3A</td>\n",
       "      <td>North York</td>\n",
       "      <td>Parkwoods\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>M4A</td>\n",
       "      <td>North York</td>\n",
       "      <td>Victoria Village\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>M5A</td>\n",
       "      <td>Downtown Toronto</td>\n",
       "      <td>Harbourfront\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>M5A</td>\n",
       "      <td>Downtown Toronto</td>\n",
       "      <td>Regent Park\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>M6A</td>\n",
       "      <td>North York</td>\n",
       "      <td>Lawrence Heights\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>M6A</td>\n",
       "      <td>North York</td>\n",
       "      <td>Lawrence Manor\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>M7A</td>\n",
       "      <td>Queen's Park</td>\n",
       "      <td>Not assigned\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>M8A</td>\n",
       "      <td>Not assigned</td>\n",
       "      <td>Not assigned\\n</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Postcode           Borough     Neighbourhood\\n\n",
       "0      M1A      Not assigned      Not assigned\\n\n",
       "1      M2A      Not assigned      Not assigned\\n\n",
       "2      M3A        North York         Parkwoods\\n\n",
       "3      M4A        North York  Victoria Village\\n\n",
       "4      M5A  Downtown Toronto      Harbourfront\\n\n",
       "5      M5A  Downtown Toronto       Regent Park\\n\n",
       "6      M6A        North York  Lawrence Heights\\n\n",
       "7      M6A        North York    Lawrence Manor\\n\n",
       "8      M7A      Queen's Park      Not assigned\\n\n",
       "9      M8A      Not assigned      Not assigned\\n"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(288, 3)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Ignore cells with a borough that is Not assigned.\n",
    "df_1 = df[df['Borough'] != 'Not assigned'].reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>Postcode</th>\n",
       "      <th>Borough</th>\n",
       "      <th>Neighbourhood</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>M3A</td>\n",
       "      <td>North York</td>\n",
       "      <td>Parkwoods\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td>M4A</td>\n",
       "      <td>North York</td>\n",
       "      <td>Victoria Village\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>M5A</td>\n",
       "      <td>Downtown Toronto</td>\n",
       "      <td>Harbourfront\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5</td>\n",
       "      <td>M5A</td>\n",
       "      <td>Downtown Toronto</td>\n",
       "      <td>Regent Park\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6</td>\n",
       "      <td>M6A</td>\n",
       "      <td>North York</td>\n",
       "      <td>Lawrence Heights\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>7</td>\n",
       "      <td>M6A</td>\n",
       "      <td>North York</td>\n",
       "      <td>Lawrence Manor\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>8</td>\n",
       "      <td>M7A</td>\n",
       "      <td>Queen's Park</td>\n",
       "      <td>Not assigned\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>10</td>\n",
       "      <td>M9A</td>\n",
       "      <td>Etobicoke</td>\n",
       "      <td>Islington Avenue\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>11</td>\n",
       "      <td>M1B</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Rouge\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>12</td>\n",
       "      <td>M1B</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Malvern\\n</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   index Postcode           Borough     Neighbourhood\\n\n",
       "0      2      M3A        North York         Parkwoods\\n\n",
       "1      3      M4A        North York  Victoria Village\\n\n",
       "2      4      M5A  Downtown Toronto      Harbourfront\\n\n",
       "3      5      M5A  Downtown Toronto       Regent Park\\n\n",
       "4      6      M6A        North York  Lawrence Heights\\n\n",
       "5      7      M6A        North York    Lawrence Manor\\n\n",
       "6      8      M7A      Queen's Park      Not assigned\\n\n",
       "7     10      M9A         Etobicoke  Islington Avenue\\n\n",
       "8     11      M1B       Scarborough             Rouge\\n\n",
       "9     12      M1B       Scarborough           Malvern\\n"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "103"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1['Postcode'].unique().size"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_1.columns = ['Drop','Postcode', 'Borough', 'Neighborhood']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Drop</th>\n",
       "      <th>Postcode</th>\n",
       "      <th>Borough</th>\n",
       "      <th>Neighborhood</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>M3A</td>\n",
       "      <td>North York</td>\n",
       "      <td>Parkwoods\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td>M4A</td>\n",
       "      <td>North York</td>\n",
       "      <td>Victoria Village\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>M5A</td>\n",
       "      <td>Downtown Toronto</td>\n",
       "      <td>Harbourfront\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5</td>\n",
       "      <td>M5A</td>\n",
       "      <td>Downtown Toronto</td>\n",
       "      <td>Regent Park\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6</td>\n",
       "      <td>M6A</td>\n",
       "      <td>North York</td>\n",
       "      <td>Lawrence Heights\\n</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Drop Postcode           Borough        Neighborhood\n",
       "0     2      M3A        North York         Parkwoods\\n\n",
       "1     3      M4A        North York  Victoria Village\\n\n",
       "2     4      M5A  Downtown Toronto      Harbourfront\\n\n",
       "3     5      M5A  Downtown Toronto       Regent Park\\n\n",
       "4     6      M6A        North York  Lawrence Heights\\n"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_1.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_2 = df_1[['Postcode','Borough','Neighborhood']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Postcode</th>\n",
       "      <th>Borough</th>\n",
       "      <th>Neighborhood</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>M3A</td>\n",
       "      <td>North York</td>\n",
       "      <td>Parkwoods\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>M4A</td>\n",
       "      <td>North York</td>\n",
       "      <td>Victoria Village\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M5A</td>\n",
       "      <td>Downtown Toronto</td>\n",
       "      <td>Harbourfront\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>M5A</td>\n",
       "      <td>Downtown Toronto</td>\n",
       "      <td>Regent Park\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>M6A</td>\n",
       "      <td>North York</td>\n",
       "      <td>Lawrence Heights\\n</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Postcode           Borough        Neighborhood\n",
       "0      M3A        North York         Parkwoods\\n\n",
       "1      M4A        North York  Victoria Village\\n\n",
       "2      M5A  Downtown Toronto      Harbourfront\\n\n",
       "3      M5A  Downtown Toronto       Regent Park\\n\n",
       "4      M6A        North York  Lawrence Heights\\n"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_2.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_2 = df_2.groupby(['Postcode','Borough'])['Neighborhood'].apply(', '.join).reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Postcode</th>\n",
       "      <th>Borough</th>\n",
       "      <th>Neighborhood</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>M1B</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Rouge\\n, Malvern\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>M1C</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Highland Creek\\n, Rouge Hill\\n, Port Union\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M1E</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Guildwood\\n, Morningside\\n, West Hill\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>M1G</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Woburn\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>M1H</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Cedarbrae\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>M1J</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Scarborough Village\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>M1K</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>East Birchmount Park\\n, Ionview\\n, Kennedy Park\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>M1L</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Clairlea\\n, Golden Mile\\n, Oakridge\\n</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>M1M</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Cliffcrest\\n, Cliffside\\n, Scarborough Village...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>M1N</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Birch Cliff\\n, Cliffside West\\n</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Postcode      Borough                                       Neighborhood\n",
       "0      M1B  Scarborough                                 Rouge\\n, Malvern\\n\n",
       "1      M1C  Scarborough       Highland Creek\\n, Rouge Hill\\n, Port Union\\n\n",
       "2      M1E  Scarborough            Guildwood\\n, Morningside\\n, West Hill\\n\n",
       "3      M1G  Scarborough                                           Woburn\\n\n",
       "4      M1H  Scarborough                                        Cedarbrae\\n\n",
       "5      M1J  Scarborough                              Scarborough Village\\n\n",
       "6      M1K  Scarborough  East Birchmount Park\\n, Ionview\\n, Kennedy Park\\n\n",
       "7      M1L  Scarborough              Clairlea\\n, Golden Mile\\n, Oakridge\\n\n",
       "8      M1M  Scarborough  Cliffcrest\\n, Cliffside\\n, Scarborough Village...\n",
       "9      M1N  Scarborough                    Birch Cliff\\n, Cliffside West\\n"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_2.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(103, 3)"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_2.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Create a dataframe including lotitude and longitude data\n",
    "url_coordinates = 'http://cocl.us/Geospatial_data'\n",
    "coordinates = pd.read_csv(url_coordinates)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Postal Code</th>\n",
       "      <th>Latitude</th>\n",
       "      <th>Longitude</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>M1B</td>\n",
       "      <td>43.806686</td>\n",
       "      <td>-79.194353</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>M1C</td>\n",
       "      <td>43.784535</td>\n",
       "      <td>-79.160497</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M1E</td>\n",
       "      <td>43.763573</td>\n",
       "      <td>-79.188711</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>M1G</td>\n",
       "      <td>43.770992</td>\n",
       "      <td>-79.216917</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>M1H</td>\n",
       "      <td>43.773136</td>\n",
       "      <td>-79.239476</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Postal Code   Latitude  Longitude\n",
       "0         M1B  43.806686 -79.194353\n",
       "1         M1C  43.784535 -79.160497\n",
       "2         M1E  43.763573 -79.188711\n",
       "3         M1G  43.770992 -79.216917\n",
       "4         M1H  43.773136 -79.239476"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "coordinates.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Change the name of column in order to merge with other dataframe\n",
    "coordinates.rename(columns={\"Postal Code\":\"Postcode\"}, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Postcode</th>\n",
       "      <th>Latitude</th>\n",
       "      <th>Longitude</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>M1B</td>\n",
       "      <td>43.806686</td>\n",
       "      <td>-79.194353</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>M1C</td>\n",
       "      <td>43.784535</td>\n",
       "      <td>-79.160497</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M1E</td>\n",
       "      <td>43.763573</td>\n",
       "      <td>-79.188711</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>M1G</td>\n",
       "      <td>43.770992</td>\n",
       "      <td>-79.216917</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>M1H</td>\n",
       "      <td>43.773136</td>\n",
       "      <td>-79.239476</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Postcode   Latitude  Longitude\n",
       "0      M1B  43.806686 -79.194353\n",
       "1      M1C  43.784535 -79.160497\n",
       "2      M1E  43.763573 -79.188711\n",
       "3      M1G  43.770992 -79.216917\n",
       "4      M1H  43.773136 -79.239476"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "coordinates.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_merged = pd.merge(df_2, coordinates, on='Postcode') # Merge two dataframes on Postcode"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Postcode</th>\n",
       "      <th>Borough</th>\n",
       "      <th>Neighborhood</th>\n",
       "      <th>Latitude</th>\n",
       "      <th>Longitude</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>M1B</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Rouge\\n, Malvern\\n</td>\n",
       "      <td>43.806686</td>\n",
       "      <td>-79.194353</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>M1C</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Highland Creek\\n, Rouge Hill\\n, Port Union\\n</td>\n",
       "      <td>43.784535</td>\n",
       "      <td>-79.160497</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M1E</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Guildwood\\n, Morningside\\n, West Hill\\n</td>\n",
       "      <td>43.763573</td>\n",
       "      <td>-79.188711</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>M1G</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Woburn\\n</td>\n",
       "      <td>43.770992</td>\n",
       "      <td>-79.216917</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>M1H</td>\n",
       "      <td>Scarborough</td>\n",
       "      <td>Cedarbrae\\n</td>\n",
       "      <td>43.773136</td>\n",
       "      <td>-79.239476</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Postcode      Borough                                  Neighborhood  \\\n",
       "0      M1B  Scarborough                            Rouge\\n, Malvern\\n   \n",
       "1      M1C  Scarborough  Highland Creek\\n, Rouge Hill\\n, Port Union\\n   \n",
       "2      M1E  Scarborough       Guildwood\\n, Morningside\\n, West Hill\\n   \n",
       "3      M1G  Scarborough                                      Woburn\\n   \n",
       "4      M1H  Scarborough                                   Cedarbrae\\n   \n",
       "\n",
       "    Latitude  Longitude  \n",
       "0  43.806686 -79.194353  \n",
       "1  43.784535 -79.160497  \n",
       "2  43.763573 -79.188711  \n",
       "3  43.770992 -79.216917  \n",
       "4  43.773136 -79.239476  "
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_merged.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}