Skip to content

Instantly share code, notes, and snippets.

@MasoudAali
Created April 23, 2020 16:39
Show Gist options
  • Save MasoudAali/55d6912f37c82bcbf1312af16d46014e to your computer and use it in GitHub Desktop.
Save MasoudAali/55d6912f37c82bcbf1312af16d46014e to your computer and use it in GitHub Desktop.
Created on Skills Network Labs
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Postal code Borough \\\n",
"0 M1A Not assigned \n",
"1 M2A Not assigned \n",
"2 M3A North York \n",
"3 M4A North York \n",
"4 M5A Downtown Toronto \n",
".. ... ... \n",
"175 M5Z Not assigned \n",
"176 M6Z Not assigned \n",
"177 M7Z Not assigned \n",
"178 M8Z Etobicoke \n",
"179 M9Z Not assigned \n",
"\n",
" Neighborhood \n",
"0 NaN \n",
"1 NaN \n",
"2 Parkwoods \n",
"3 Victoria Village \n",
"4 Regent Park / Harbourfront \n",
".. ... \n",
"175 NaN \n",
"176 NaN \n",
"177 NaN \n",
"178 Mimico NW / The Queensway West / South of Bloo... \n",
"179 NaN \n",
"\n",
"[180 rows x 3 columns]\n"
]
}
],
"source": [
"import requests\n",
"import pandas as pd\n",
"# Importing the trable from the webpage\n",
"url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'\n",
"html = requests.get(url).content\n",
"df_list = pd.read_html(html)\n",
"df = df_list[0]\n",
"print(df)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Lawrence Manor / Lawrence Heights'"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Removing cells with Borough value not assigned\n",
"df = df[df.Borough != 'Not assigned']"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid syntax (<ipython-input-39-c3c0de095a4a>, line 2)",
"output_type": "error",
"traceback": [
"\u001b[0;36m File \u001b[0;32m\"<ipython-input-39-c3c0de095a4a>\"\u001b[0;36m, line \u001b[0;32m2\u001b[0m\n\u001b[0;31m if (df.iloc[i,2] == 'Not assigned'):\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
]
}
],
"source": [
"# Replace the un-assigned Neigborhood value with their Borough value\n",
"for (i in range(df.shape[0]) \n",
" if (df.iloc[i,2] == 'Not assigned'):\n",
" df.iloc[i,2] = df.iloc[i,1]"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"# Cleaning the data \n",
"df['Neighborhood'] = df['Neighborhood'].str.replace('/', ',')"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Postal code</th>\n",
" <th>Borough</th>\n",
" <th>Neighborhood</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>M3A</td>\n",
" <td>North York</td>\n",
" <td>Parkwoods</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>M4A</td>\n",
" <td>North York</td>\n",
" <td>Victoria Village</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>M5A</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Regent Park , Harbourfront</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>M6A</td>\n",
" <td>North York</td>\n",
" <td>Lawrence Manor , Lawrence Heights</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>M7A</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Queen's Park , Ontario Provincial Government</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>160</th>\n",
" <td>M8X</td>\n",
" <td>Etobicoke</td>\n",
" <td>The Kingsway , Montgomery Road , Old Mill North</td>\n",
" </tr>\n",
" <tr>\n",
" <th>165</th>\n",
" <td>M4Y</td>\n",
" <td>Downtown Toronto</td>\n",
" <td>Church and Wellesley</td>\n",
" </tr>\n",
" <tr>\n",
" <th>168</th>\n",
" <td>M7Y</td>\n",
" <td>East Toronto</td>\n",
" <td>Business reply mail Processing CentrE</td>\n",
" </tr>\n",
" <tr>\n",
" <th>169</th>\n",
" <td>M8Y</td>\n",
" <td>Etobicoke</td>\n",
" <td>Old Mill South , King's Mill Park , Sunnylea ,...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>178</th>\n",
" <td>M8Z</td>\n",
" <td>Etobicoke</td>\n",
" <td>Mimico NW , The Queensway West , South of Bloo...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>103 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" Postal code Borough \\\n",
"2 M3A North York \n",
"3 M4A North York \n",
"4 M5A Downtown Toronto \n",
"5 M6A North York \n",
"6 M7A Downtown Toronto \n",
".. ... ... \n",
"160 M8X Etobicoke \n",
"165 M4Y Downtown Toronto \n",
"168 M7Y East Toronto \n",
"169 M8Y Etobicoke \n",
"178 M8Z Etobicoke \n",
"\n",
" Neighborhood \n",
"2 Parkwoods \n",
"3 Victoria Village \n",
"4 Regent Park , Harbourfront \n",
"5 Lawrence Manor , Lawrence Heights \n",
"6 Queen's Park , Ontario Provincial Government \n",
".. ... \n",
"160 The Kingsway , Montgomery Road , Old Mill North \n",
"165 Church and Wellesley \n",
"168 Business reply mail Processing CentrE \n",
"169 Old Mill South , King's Mill Park , Sunnylea ,... \n",
"178 Mimico NW , The Queensway West , South of Bloo... \n",
"\n",
"[103 rows x 3 columns]"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df\n",
"# # of rows\n",
"df.shape[0]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python",
"language": "python",
"name": "conda-env-python-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.10"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment