Skip to content

Instantly share code, notes, and snippets.

@AlisonDavey
Created October 17, 2018 22:07
Show Gist options
  • Save AlisonDavey/bef98362f4e442b340ed0a05ead43b91 to your computer and use it in GitHub Desktop.
Save AlisonDavey/bef98362f4e442b340ed0a05ead43b91 to your computer and use it in GitHub Desktop.
fast_ai_v1_p3_people_map.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {},
"cell_type": "markdown",
"source": "fast.ai map of p1v3 participants"
},
{
"metadata": {
"collapsed": true,
"trusted": false
},
"cell_type": "code",
"source": "import numpy as np\nimport pandas as pd\n\n#put the path to your data files here\n#PATH = '...'\n",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "df = pd.read_csv(f'{PATH}fastaip1v3.csv')\ndf.head()",
"execution_count": 2,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>lat</th>\n <th>long</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>-44</td>\n <td>173</td>\n </tr>\n <tr>\n <th>1</th>\n <td>-43</td>\n <td>147</td>\n </tr>\n <tr>\n <th>2</th>\n <td>-39</td>\n <td>-73</td>\n </tr>\n <tr>\n <th>3</th>\n <td>-38</td>\n <td>145</td>\n </tr>\n <tr>\n <th>4</th>\n <td>-38</td>\n <td>145</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " lat long\n0 -44 173\n1 -43 147\n2 -39 -73\n3 -38 145\n4 -38 145"
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "df = df.groupby(['lat','long']).agg({'lat':'count'})\ndf.rename(columns={'lat': 'count'}, inplace=True)\ndf.reset_index(level=['lat', 'long'], inplace=True)\ndf.head()",
"execution_count": 3,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>lat</th>\n <th>long</th>\n <th>count</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>-44</td>\n <td>173</td>\n <td>1</td>\n </tr>\n <tr>\n <th>1</th>\n <td>-43</td>\n <td>147</td>\n <td>1</td>\n </tr>\n <tr>\n <th>2</th>\n <td>-39</td>\n <td>-73</td>\n <td>1</td>\n </tr>\n <tr>\n <th>3</th>\n <td>-38</td>\n <td>145</td>\n <td>29</td>\n </tr>\n <tr>\n <th>4</th>\n <td>-35</td>\n <td>-59</td>\n <td>1</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " lat long count\n0 -44 173 1\n1 -43 147 1\n2 -39 -73 1\n3 -38 145 29\n4 -35 -59 1"
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "df.shape, df['count'].sum()",
"execution_count": 4,
"outputs": [
{
"data": {
"text/plain": "((491, 3), 2721)"
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "World Cities Database downloaded from kaggle\n\nhttps://www.kaggle.com/max-mind/world-cities-database"
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "df_places = pd.read_csv(f'{PATH}worldcitiespop.csv', low_memory=False)\ndf_places.head()",
"execution_count": 5,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Country</th>\n <th>City</th>\n <th>AccentCity</th>\n <th>Region</th>\n <th>Population</th>\n <th>Latitude</th>\n <th>Longitude</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>ad</td>\n <td>aixas</td>\n <td>Aixàs</td>\n <td>06</td>\n <td>NaN</td>\n <td>42.483333</td>\n <td>1.466667</td>\n </tr>\n <tr>\n <th>1</th>\n <td>ad</td>\n <td>aixirivali</td>\n <td>Aixirivali</td>\n <td>06</td>\n <td>NaN</td>\n <td>42.466667</td>\n <td>1.500000</td>\n </tr>\n <tr>\n <th>2</th>\n <td>ad</td>\n <td>aixirivall</td>\n <td>Aixirivall</td>\n <td>06</td>\n <td>NaN</td>\n <td>42.466667</td>\n <td>1.500000</td>\n </tr>\n <tr>\n <th>3</th>\n <td>ad</td>\n <td>aixirvall</td>\n <td>Aixirvall</td>\n <td>06</td>\n <td>NaN</td>\n <td>42.466667</td>\n <td>1.500000</td>\n </tr>\n <tr>\n <th>4</th>\n <td>ad</td>\n <td>aixovall</td>\n <td>Aixovall</td>\n <td>06</td>\n <td>NaN</td>\n <td>42.466667</td>\n <td>1.483333</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " Country City AccentCity Region Population Latitude Longitude\n0 ad aixas Aixàs 06 NaN 42.483333 1.466667\n1 ad aixirivali Aixirivali 06 NaN 42.466667 1.500000\n2 ad aixirivall Aixirivall 06 NaN 42.466667 1.500000\n3 ad aixirvall Aixirvall 06 NaN 42.466667 1.500000\n4 ad aixovall Aixovall 06 NaN 42.466667 1.483333"
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "df_places['lat'] = df_places.Latitude.round()\ndf_places['long'] = df_places.Longitude.round()\ndf_places.head()",
"execution_count": 6,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Country</th>\n <th>City</th>\n <th>AccentCity</th>\n <th>Region</th>\n <th>Population</th>\n <th>Latitude</th>\n <th>Longitude</th>\n <th>lat</th>\n <th>long</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>ad</td>\n <td>aixas</td>\n <td>Aixàs</td>\n <td>06</td>\n <td>NaN</td>\n <td>42.483333</td>\n <td>1.466667</td>\n <td>42.0</td>\n <td>1.0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>ad</td>\n <td>aixirivali</td>\n <td>Aixirivali</td>\n <td>06</td>\n <td>NaN</td>\n <td>42.466667</td>\n <td>1.500000</td>\n <td>42.0</td>\n <td>2.0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>ad</td>\n <td>aixirivall</td>\n <td>Aixirivall</td>\n <td>06</td>\n <td>NaN</td>\n <td>42.466667</td>\n <td>1.500000</td>\n <td>42.0</td>\n <td>2.0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>ad</td>\n <td>aixirvall</td>\n <td>Aixirvall</td>\n <td>06</td>\n <td>NaN</td>\n <td>42.466667</td>\n <td>1.500000</td>\n <td>42.0</td>\n <td>2.0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>ad</td>\n <td>aixovall</td>\n <td>Aixovall</td>\n <td>06</td>\n <td>NaN</td>\n <td>42.466667</td>\n <td>1.483333</td>\n <td>42.0</td>\n <td>1.0</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " Country City AccentCity Region Population Latitude Longitude \\\n0 ad aixas Aixàs 06 NaN 42.483333 1.466667 \n1 ad aixirivali Aixirivali 06 NaN 42.466667 1.500000 \n2 ad aixirivall Aixirivall 06 NaN 42.466667 1.500000 \n3 ad aixirvall Aixirvall 06 NaN 42.466667 1.500000 \n4 ad aixovall Aixovall 06 NaN 42.466667 1.483333 \n\n lat long \n0 42.0 1.0 \n1 42.0 2.0 \n2 42.0 2.0 \n3 42.0 2.0 \n4 42.0 1.0 "
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"collapsed": true,
"trusted": false
},
"cell_type": "code",
"source": "#hack (see below)\n# for checking manual addition of Country\tCity\tAccentCity where\n# there are no population figures\ntmp = df_places.loc[df_places['lat'] == 24.0]\ntmp = tmp.loc[df_places['long'] == 40.0]\ntmp.head()",
"execution_count": null,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Many places have the same `lat` and `long`. Select the place with the highest population."
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "idx = df_places.groupby(['lat','long'])['Population'].transform(max) == df_places['Population']\ndf_places = df_places[idx]\ndf_places.head()",
"execution_count": 8,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>Country</th>\n <th>City</th>\n <th>AccentCity</th>\n <th>Region</th>\n <th>Population</th>\n <th>Latitude</th>\n <th>Longitude</th>\n <th>lat</th>\n <th>long</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>93</th>\n <td>ae</td>\n <td>abu dhabi</td>\n <td>Abu Dhabi</td>\n <td>01</td>\n <td>603687.0</td>\n <td>24.466667</td>\n <td>54.366667</td>\n <td>24.0</td>\n <td>54.0</td>\n </tr>\n <tr>\n <th>242</th>\n <td>ae</td>\n <td>dubai</td>\n <td>Dubai</td>\n <td>03</td>\n <td>1137376.0</td>\n <td>25.258172</td>\n <td>55.304717</td>\n <td>25.0</td>\n <td>55.0</td>\n </tr>\n <tr>\n <th>4547</th>\n <td>af</td>\n <td>asadabad</td>\n <td>Asadabad</td>\n <td>34</td>\n <td>48400.0</td>\n <td>34.873113</td>\n <td>71.146970</td>\n <td>35.0</td>\n <td>71.0</td>\n </tr>\n <tr>\n <th>5410</th>\n <td>af</td>\n <td>aybak</td>\n <td>Aybak</td>\n <td>32</td>\n <td>47877.0</td>\n <td>36.264680</td>\n <td>68.015512</td>\n <td>36.0</td>\n <td>68.0</td>\n </tr>\n <tr>\n <th>6644</th>\n <td>af</td>\n <td>baglan</td>\n <td>Baglan</td>\n <td>03</td>\n <td>108481.0</td>\n <td>36.130684</td>\n <td>68.708286</td>\n <td>36.0</td>\n <td>69.0</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " Country City AccentCity Region Population Latitude Longitude \\\n93 ae abu dhabi Abu Dhabi 01 603687.0 24.466667 54.366667 \n242 ae dubai Dubai 03 1137376.0 25.258172 55.304717 \n4547 af asadabad Asadabad 34 48400.0 34.873113 71.146970 \n5410 af aybak Aybak 32 47877.0 36.264680 68.015512 \n6644 af baglan Baglan 03 108481.0 36.130684 68.708286 \n\n lat long \n93 24.0 54.0 \n242 25.0 55.0 \n4547 35.0 71.0 \n5410 36.0 68.0 \n6644 36.0 69.0 "
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "df_map = df.merge(df_places, how='left', on=['lat','long'])\ndf_map=df_map[['lat','long','count','Country','AccentCity','Region']]\ndf_map.head()",
"execution_count": 9,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>lat</th>\n <th>long</th>\n <th>count</th>\n <th>Country</th>\n <th>AccentCity</th>\n <th>Region</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>-44</td>\n <td>173</td>\n <td>1</td>\n <td>nz</td>\n <td>Christchurch</td>\n <td>E9</td>\n </tr>\n <tr>\n <th>1</th>\n <td>-43</td>\n <td>147</td>\n <td>1</td>\n <td>au</td>\n <td>Hobart</td>\n <td>06</td>\n </tr>\n <tr>\n <th>2</th>\n <td>-39</td>\n <td>-73</td>\n <td>1</td>\n <td>cl</td>\n <td>Temuco</td>\n <td>04</td>\n </tr>\n <tr>\n <th>3</th>\n <td>-38</td>\n <td>145</td>\n <td>29</td>\n <td>au</td>\n <td>Melbourne</td>\n <td>07</td>\n </tr>\n <tr>\n <th>4</th>\n <td>-35</td>\n <td>-59</td>\n <td>1</td>\n <td>ar</td>\n <td>Lujan</td>\n <td>01</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " lat long count Country AccentCity Region\n0 -44 173 1 nz Christchurch E9\n1 -43 147 1 au Hobart 06\n2 -39 -73 1 cl Temuco 04\n3 -38 145 29 au Melbourne 07\n4 -35 -59 1 ar Lujan 01"
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "df_map.shape, df_map['count'].sum() #fails sanity check",
"execution_count": 10,
"outputs": [
{
"data": {
"text/plain": "((496, 6), 2801)"
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Problem with repeated values of `lat\tlong\tcount`"
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "for i in range (1, df_map.shape[0]):\n if (df_map.loc[i,['lat','long', 'count']] == df_map.loc[i-1,['lat','long', 'count']]).all():\n print (i)",
"execution_count": 11,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "109\n149\n241\n444\n490\n"
}
]
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "df_map.iloc[108:110]",
"execution_count": 12,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>lat</th>\n <th>long</th>\n <th>count</th>\n <th>Country</th>\n <th>AccentCity</th>\n <th>Region</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>108</th>\n <td>21</td>\n <td>106</td>\n <td>4</td>\n <td>vn</td>\n <td>Hanoi</td>\n <td>44</td>\n </tr>\n <tr>\n <th>109</th>\n <td>21</td>\n <td>106</td>\n <td>4</td>\n <td>vn</td>\n <td>Ha Noi</td>\n <td>44</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " lat long count Country AccentCity Region\n108 21 106 4 vn Hanoi 44\n109 21 106 4 vn Ha Noi 44"
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"collapsed": true,
"trusted": false
},
"cell_type": "code",
"source": "df_map.drop(df_map.index[109], inplace=True)",
"execution_count": 13,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "df_map.iloc[147:149]",
"execution_count": 14,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>lat</th>\n <th>long</th>\n <th>count</th>\n <th>Country</th>\n <th>AccentCity</th>\n <th>Region</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>148</th>\n <td>29</td>\n <td>77</td>\n <td>67</td>\n <td>in</td>\n <td>Delhi</td>\n <td>07</td>\n </tr>\n <tr>\n <th>149</th>\n <td>29</td>\n <td>77</td>\n <td>67</td>\n <td>in</td>\n <td>New Delhi</td>\n <td>07</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " lat long count Country AccentCity Region\n148 29 77 67 in Delhi 07\n149 29 77 67 in New Delhi 07"
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"collapsed": true,
"trusted": false
},
"cell_type": "code",
"source": "df_map.drop(df_map.index[148], inplace=True)",
"execution_count": 15,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "df_map.iloc[238:240]",
"execution_count": 16,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>lat</th>\n <th>long</th>\n <th>count</th>\n <th>Country</th>\n <th>AccentCity</th>\n <th>Region</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>240</th>\n <td>39</td>\n <td>-95</td>\n <td>1</td>\n <td>us</td>\n <td>Kansas City</td>\n <td>KS</td>\n </tr>\n <tr>\n <th>241</th>\n <td>39</td>\n <td>-95</td>\n <td>1</td>\n <td>us</td>\n <td>Kansas City</td>\n <td>MO</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " lat long count Country AccentCity Region\n240 39 -95 1 us Kansas City KS\n241 39 -95 1 us Kansas City MO"
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"collapsed": true,
"trusted": false
},
"cell_type": "code",
"source": "df_map.drop(df_map.index[239], inplace=True)",
"execution_count": 17,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "df_map.iloc[440:442]",
"execution_count": 18,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>lat</th>\n <th>long</th>\n <th>count</th>\n <th>Country</th>\n <th>AccentCity</th>\n <th>Region</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>443</th>\n <td>53</td>\n <td>-1</td>\n <td>7</td>\n <td>gb</td>\n <td>Leicester</td>\n <td>H4</td>\n </tr>\n <tr>\n <th>444</th>\n <td>53</td>\n <td>-1</td>\n <td>7</td>\n <td>gb</td>\n <td>Leicester</td>\n <td>H5</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " lat long count Country AccentCity Region\n443 53 -1 7 gb Leicester H4\n444 53 -1 7 gb Leicester H5"
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"collapsed": true,
"trusted": false
},
"cell_type": "code",
"source": "df_map.drop(df_map.index[441], inplace=True)",
"execution_count": 19,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "df_map.iloc[485:487]",
"execution_count": 20,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>lat</th>\n <th>long</th>\n <th>count</th>\n <th>Country</th>\n <th>AccentCity</th>\n <th>Region</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>489</th>\n <td>60</td>\n <td>18</td>\n <td>1</td>\n <td>se</td>\n <td>Uppsala</td>\n <td>21</td>\n </tr>\n <tr>\n <th>490</th>\n <td>60</td>\n <td>18</td>\n <td>1</td>\n <td>se</td>\n <td>Uppsala</td>\n <td>26</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " lat long count Country AccentCity Region\n489 60 18 1 se Uppsala 21\n490 60 18 1 se Uppsala 26"
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"collapsed": true,
"trusted": false
},
"cell_type": "code",
"source": "df_map.drop(df_map.index[486], inplace=True)",
"execution_count": 21,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "df_map.shape, df_map['count'].sum() #passes sanity check",
"execution_count": 22,
"outputs": [
{
"data": {
"text/plain": "((491, 6), 2721)"
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Some places had no `Population` values, all `NaN`s"
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "df_map.iloc[df_map.isnull().any(1).nonzero()[0]]",
"execution_count": 23,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>lat</th>\n <th>long</th>\n <th>count</th>\n <th>Country</th>\n <th>AccentCity</th>\n <th>Region</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>122</th>\n <td>24</td>\n <td>40</td>\n <td>1</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>172</th>\n <td>31</td>\n <td>122</td>\n <td>1</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>186</th>\n <td>33</td>\n <td>77</td>\n <td>2</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>200</th>\n <td>35</td>\n <td>127</td>\n <td>1</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>215</th>\n <td>36</td>\n <td>108</td>\n <td>1</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>216</th>\n <td>36</td>\n <td>127</td>\n <td>1</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>217</th>\n <td>36</td>\n <td>129</td>\n <td>1</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>234</th>\n <td>38</td>\n <td>0</td>\n <td>2</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>249</th>\n <td>39</td>\n <td>39</td>\n <td>1</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>354</th>\n <td>47</td>\n <td>-89</td>\n <td>1</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " lat long count Country AccentCity Region\n122 24 40 1 NaN NaN NaN\n172 31 122 1 NaN NaN NaN\n186 33 77 2 NaN NaN NaN\n200 35 127 1 NaN NaN NaN\n215 36 108 1 NaN NaN NaN\n216 36 127 1 NaN NaN NaN\n217 36 129 1 NaN NaN NaN\n234 38 0 2 NaN NaN NaN\n249 39 39 1 NaN NaN NaN\n354 47 -89 1 NaN NaN NaN"
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Manually allocate `Country AccentCity Region` by looking at\n\nhttps://www.findlatitudeandlongitude.com\n\nand looking at data from `df_places` at the `#hack` above\n\n122\t24\t40 sa Medina 05\n\n172\t31\t122 cn Shanghai 23\n\n186\t33\t77 in Darcha 11\n\n200\t35\t127 kr Gwangju 16\n\n215\t36\t108 cn Qingyang 15\n\n216\t36\t127 kr Jeonju 17\n\n217\t36\t129 kr Yeongcheon 14\n\n234\t38\t0 es San Juan de Alicante 60\n\n249\t39\t39 tr Elazig 62\n\n354\t47\t-89 us Houghton MI"
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "df_map.loc[122, ['Country','AccentCity','Region']]=['sa','Medina','05']\ndf_map.loc[172, ['Country','AccentCity','Region']]=['cn','Shanghai','23']\ndf_map.loc[186, ['Country','AccentCity','Region']]=['in','Darcha','11']\ndf_map.loc[200, ['Country','AccentCity','Region']]=['kr','Gwangju','16']\ndf_map.loc[215, ['Country','AccentCity','Region']]=['cn','Qingyang','15']\ndf_map.loc[216, ['Country','AccentCity','Region']]=['kr','Jeonju','17']\ndf_map.loc[217, ['Country','AccentCity','Region']]=['kr','Yeongcheon','14']\ndf_map.loc[234, ['Country','AccentCity','Region']]=['es', 'San Juan de Alicante', '60']\ndf_map.loc[249, ['Country','AccentCity','Region']]=['tr', 'Elazig', '62']\ndf_map.loc[354, ['Country','AccentCity','Region']]=['us', 'Houghton', 'MI']\ndf_map.loc[122]",
"execution_count": 24,
"outputs": [
{
"data": {
"text/plain": "lat 24\nlong 40\ncount 1\nCountry sa\nAccentCity Medina\nRegion 05\nName: 122, dtype: object"
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Convert two-letter country codes to upper case"
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "df_map['Country'] = df_map['Country'].apply(lambda x: x.upper())\ndf_map.head()",
"execution_count": 25,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>lat</th>\n <th>long</th>\n <th>count</th>\n <th>Country</th>\n <th>AccentCity</th>\n <th>Region</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>-44</td>\n <td>173</td>\n <td>1</td>\n <td>NZ</td>\n <td>Christchurch</td>\n <td>E9</td>\n </tr>\n <tr>\n <th>1</th>\n <td>-43</td>\n <td>147</td>\n <td>1</td>\n <td>AU</td>\n <td>Hobart</td>\n <td>06</td>\n </tr>\n <tr>\n <th>2</th>\n <td>-39</td>\n <td>-73</td>\n <td>1</td>\n <td>CL</td>\n <td>Temuco</td>\n <td>04</td>\n </tr>\n <tr>\n <th>3</th>\n <td>-38</td>\n <td>145</td>\n <td>29</td>\n <td>AU</td>\n <td>Melbourne</td>\n <td>07</td>\n </tr>\n <tr>\n <th>4</th>\n <td>-35</td>\n <td>-59</td>\n <td>1</td>\n <td>AR</td>\n <td>Lujan</td>\n <td>01</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " lat long count Country AccentCity Region\n0 -44 173 1 NZ Christchurch E9\n1 -43 147 1 AU Hobart 06\n2 -39 -73 1 CL Temuco 04\n3 -38 145 29 AU Melbourne 07\n4 -35 -59 1 AR Lujan 01"
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "ISO countries and regional codes downloaded from github\n\nhttps://github.com/lukes/ISO-3166-Countries-with-Regional-Codes"
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "df_iso = pd.read_csv(f'{PATH}all.csv')\ndf_iso.head()",
"execution_count": 26,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>name</th>\n <th>alpha-2</th>\n <th>alpha-3</th>\n <th>country-code</th>\n <th>iso_3166-2</th>\n <th>region</th>\n <th>sub-region</th>\n <th>intermediate-region</th>\n <th>region-code</th>\n <th>sub-region-code</th>\n <th>intermediate-region-code</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Afghanistan</td>\n <td>AF</td>\n <td>AFG</td>\n <td>4</td>\n <td>ISO 3166-2:AF</td>\n <td>Asia</td>\n <td>Southern Asia</td>\n <td>NaN</td>\n <td>142.0</td>\n <td>34.0</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Åland Islands</td>\n <td>AX</td>\n <td>ALA</td>\n <td>248</td>\n <td>ISO 3166-2:AX</td>\n <td>Europe</td>\n <td>Northern Europe</td>\n <td>NaN</td>\n <td>150.0</td>\n <td>154.0</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Albania</td>\n <td>AL</td>\n <td>ALB</td>\n <td>8</td>\n <td>ISO 3166-2:AL</td>\n <td>Europe</td>\n <td>Southern Europe</td>\n <td>NaN</td>\n <td>150.0</td>\n <td>39.0</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Algeria</td>\n <td>DZ</td>\n <td>DZA</td>\n <td>12</td>\n <td>ISO 3166-2:DZ</td>\n <td>Africa</td>\n <td>Northern Africa</td>\n <td>NaN</td>\n <td>2.0</td>\n <td>15.0</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>4</th>\n <td>American Samoa</td>\n <td>AS</td>\n <td>ASM</td>\n <td>16</td>\n <td>ISO 3166-2:AS</td>\n <td>Oceania</td>\n <td>Polynesia</td>\n <td>NaN</td>\n <td>9.0</td>\n <td>61.0</td>\n <td>NaN</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " name alpha-2 alpha-3 country-code iso_3166-2 region \\\n0 Afghanistan AF AFG 4 ISO 3166-2:AF Asia \n1 Åland Islands AX ALA 248 ISO 3166-2:AX Europe \n2 Albania AL ALB 8 ISO 3166-2:AL Europe \n3 Algeria DZ DZA 12 ISO 3166-2:DZ Africa \n4 American Samoa AS ASM 16 ISO 3166-2:AS Oceania \n\n sub-region intermediate-region region-code sub-region-code \\\n0 Southern Asia NaN 142.0 34.0 \n1 Northern Europe NaN 150.0 154.0 \n2 Southern Europe NaN 150.0 39.0 \n3 Northern Africa NaN 2.0 15.0 \n4 Polynesia NaN 9.0 61.0 \n\n intermediate-region-code \n0 NaN \n1 NaN \n2 NaN \n3 NaN \n4 NaN "
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "df_iso.rename(columns = {'alpha-2':'Country'}, inplace=True)\ndf_map = df_map.merge(df_iso, how='left', on=['Country'])\ndf_map.head()",
"execution_count": 27,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>lat</th>\n <th>long</th>\n <th>count</th>\n <th>Country</th>\n <th>AccentCity</th>\n <th>Region</th>\n <th>name</th>\n <th>alpha-3</th>\n <th>country-code</th>\n <th>iso_3166-2</th>\n <th>region</th>\n <th>sub-region</th>\n <th>intermediate-region</th>\n <th>region-code</th>\n <th>sub-region-code</th>\n <th>intermediate-region-code</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>-44</td>\n <td>173</td>\n <td>1</td>\n <td>NZ</td>\n <td>Christchurch</td>\n <td>E9</td>\n <td>New Zealand</td>\n <td>NZL</td>\n <td>554</td>\n <td>ISO 3166-2:NZ</td>\n <td>Oceania</td>\n <td>Australia and New Zealand</td>\n <td>NaN</td>\n <td>9.0</td>\n <td>53.0</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>1</th>\n <td>-43</td>\n <td>147</td>\n <td>1</td>\n <td>AU</td>\n <td>Hobart</td>\n <td>06</td>\n <td>Australia</td>\n <td>AUS</td>\n <td>36</td>\n <td>ISO 3166-2:AU</td>\n <td>Oceania</td>\n <td>Australia and New Zealand</td>\n <td>NaN</td>\n <td>9.0</td>\n <td>53.0</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>-39</td>\n <td>-73</td>\n <td>1</td>\n <td>CL</td>\n <td>Temuco</td>\n <td>04</td>\n <td>Chile</td>\n <td>CHL</td>\n <td>152</td>\n <td>ISO 3166-2:CL</td>\n <td>Americas</td>\n <td>Latin America and the Caribbean</td>\n <td>South America</td>\n <td>19.0</td>\n <td>419.0</td>\n <td>5.0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>-38</td>\n <td>145</td>\n <td>29</td>\n <td>AU</td>\n <td>Melbourne</td>\n <td>07</td>\n <td>Australia</td>\n <td>AUS</td>\n <td>36</td>\n <td>ISO 3166-2:AU</td>\n <td>Oceania</td>\n <td>Australia and New Zealand</td>\n <td>NaN</td>\n <td>9.0</td>\n <td>53.0</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>4</th>\n <td>-35</td>\n <td>-59</td>\n <td>1</td>\n <td>AR</td>\n <td>Lujan</td>\n <td>01</td>\n <td>Argentina</td>\n <td>ARG</td>\n <td>32</td>\n <td>ISO 3166-2:AR</td>\n <td>Americas</td>\n <td>Latin America and the Caribbean</td>\n <td>South America</td>\n <td>19.0</td>\n <td>419.0</td>\n <td>5.0</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " lat long count Country AccentCity Region name alpha-3 \\\n0 -44 173 1 NZ Christchurch E9 New Zealand NZL \n1 -43 147 1 AU Hobart 06 Australia AUS \n2 -39 -73 1 CL Temuco 04 Chile CHL \n3 -38 145 29 AU Melbourne 07 Australia AUS \n4 -35 -59 1 AR Lujan 01 Argentina ARG \n\n country-code iso_3166-2 region sub-region \\\n0 554 ISO 3166-2:NZ Oceania Australia and New Zealand \n1 36 ISO 3166-2:AU Oceania Australia and New Zealand \n2 152 ISO 3166-2:CL Americas Latin America and the Caribbean \n3 36 ISO 3166-2:AU Oceania Australia and New Zealand \n4 32 ISO 3166-2:AR Americas Latin America and the Caribbean \n\n intermediate-region region-code sub-region-code intermediate-region-code \n0 NaN 9.0 53.0 NaN \n1 NaN 9.0 53.0 NaN \n2 South America 19.0 419.0 5.0 \n3 NaN 9.0 53.0 NaN \n4 South America 19.0 419.0 5.0 "
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "df_map.groupby('sub-region').agg({'count':'sum'}).sort_values('count', ascending=False)",
"execution_count": 28,
"outputs": [
{
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>count</th>\n </tr>\n <tr>\n <th>sub-region</th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>Northern America</th>\n <td>807</td>\n </tr>\n <tr>\n <th>Southern Asia</th>\n <td>730</td>\n </tr>\n <tr>\n <th>Western Europe</th>\n <td>223</td>\n </tr>\n <tr>\n <th>Eastern Europe</th>\n <td>168</td>\n </tr>\n <tr>\n <th>Northern Europe</th>\n <td>164</td>\n </tr>\n <tr>\n <th>South-eastern Asia</th>\n <td>126</td>\n </tr>\n <tr>\n <th>Sub-Saharan Africa</th>\n <td>93</td>\n </tr>\n <tr>\n <th>Latin America and the Caribbean</th>\n <td>84</td>\n </tr>\n <tr>\n <th>Australia and New Zealand</th>\n <td>81</td>\n </tr>\n <tr>\n <th>Southern Europe</th>\n <td>79</td>\n </tr>\n <tr>\n <th>Eastern Asia</th>\n <td>69</td>\n </tr>\n <tr>\n <th>Northern Africa</th>\n <td>47</td>\n </tr>\n <tr>\n <th>Western Asia</th>\n <td>45</td>\n </tr>\n <tr>\n <th>Central Asia</th>\n <td>5</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " count\nsub-region \nNorthern America 807\nSouthern Asia 730\nWestern Europe 223\nEastern Europe 168\nNorthern Europe 164\nSouth-eastern Asia 126\nSub-Saharan Africa 93\nLatin America and the Caribbean 84\nAustralia and New Zealand 81\nSouthern Europe 79\nEastern Asia 69\nNorthern Africa 47\nWestern Asia 45\nCentral Asia 5"
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "df_map.shape, df_map['count'].sum() #passes sanity check",
"execution_count": 30,
"outputs": [
{
"data": {
"text/plain": "((491, 16), 2721)"
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"collapsed": true,
"trusted": false
},
"cell_type": "code",
"source": "df_map.to_csv('map.csv')",
"execution_count": 29,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Steps in Tableau\n\n1. ‘Connect’ to a ‘Text file’ map.csv\n2. Go to worksheet\n3. In ‘Measures’ click on # symbol left of ‘Long’, set ‘Geographic role’ to ‘Longitude’\n4. Drag ‘Lat’ and ‘Long’ to ‘Dimensions’\n5. Drag ‘Lat’ and ‘Long’ to ‘Columns’ and ‘Rows’\n6. Click on world map on the right\n7. Drag to ‘Marks’ : ‘Count’, ‘Accent City’, ‘Country’, ‘Country-Code’, ‘Name’, ‘Sub-Region’\n8. Drag ‘Sub-Region’ to ‘Color’ box\n9. Drag ‘SUM(Count)’ to ‘Size’ box, adjust with slider\n"
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.6.4",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"gist": {
"id": "",
"data": {
"description": "fast_ai_v1_p3_people_map.ipynb",
"public": true
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment