AlisonDavey/fast_ai_v1_p3_people_map.ipynb

## fast_ai_v1_p3_people_map.ipynb
{
  "cells": [
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "fast.ai map of p1v3 participants"
    },
    {
      "metadata": {
        "collapsed": true,
        "trusted": false
      },
      "cell_type": "code",
      "source": "import numpy as np\nimport pandas as pd\n\n#put the path to your data files here\n#PATH = '...'\n",
      "execution_count": 1,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "df = pd.read_csv(f'{PATH}fastaip1v3.csv')\ndf.head()",
      "execution_count": 2,
      "outputs": [
        {
          "data": {
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>lat</th>\n      <th>long</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>-44</td>\n      <td>173</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>-43</td>\n      <td>147</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>-39</td>\n      <td>-73</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>-38</td>\n      <td>145</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>-38</td>\n      <td>145</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
            "text/plain": "   lat  long\n0  -44   173\n1  -43   147\n2  -39   -73\n3  -38   145\n4  -38   145"
          },
          "execution_count": 2,
          "metadata": {},
          "output_type": "execute_result"
        }
      ]
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "df = df.groupby(['lat','long']).agg({'lat':'count'})\ndf.rename(columns={'lat': 'count'}, inplace=True)\ndf.reset_index(level=['lat', 'long'], inplace=True)\ndf.head()",
      "execution_count": 3,
      "outputs": [
        {
          "data": {
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>lat</th>\n      <th>long</th>\n      <th>count</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>-44</td>\n      <td>173</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>-43</td>\n      <td>147</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>-39</td>\n      <td>-73</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>-38</td>\n      <td>145</td>\n      <td>29</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>-35</td>\n      <td>-59</td>\n      <td>1</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
            "text/plain": "   lat  long  count\n0  -44   173      1\n1  -43   147      1\n2  -39   -73      1\n3  -38   145     29\n4  -35   -59      1"
          },
          "execution_count": 3,
          "metadata": {},
          "output_type": "execute_result"
        }
      ]
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "df.shape, df['count'].sum()",
      "execution_count": 4,
      "outputs": [
        {
          "data": {
            "text/plain": "((491, 3), 2721)"
          },
          "execution_count": 4,
          "metadata": {},
          "output_type": "execute_result"
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "World Cities Database downloaded from kaggle\n\nhttps://www.kaggle.com/max-mind/world-cities-database"
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "df_places = pd.read_csv(f'{PATH}worldcitiespop.csv', low_memory=False)\ndf_places.head()",
      "execution_count": 5,
      "outputs": [
        {
          "data": {
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Country</th>\n      <th>City</th>\n      <th>AccentCity</th>\n      <th>Region</th>\n      <th>Population</th>\n      <th>Latitude</th>\n      <th>Longitude</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>ad</td>\n      <td>aixas</td>\n      <td>Aixàs</td>\n      <td>06</td>\n      <td>NaN</td>\n      <td>42.483333</td>\n      <td>1.466667</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>ad</td>\n      <td>aixirivali</td>\n      <td>Aixirivali</td>\n      <td>06</td>\n      <td>NaN</td>\n      <td>42.466667</td>\n      <td>1.500000</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>ad</td>\n      <td>aixirivall</td>\n      <td>Aixirivall</td>\n      <td>06</td>\n      <td>NaN</td>\n      <td>42.466667</td>\n      <td>1.500000</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>ad</td>\n      <td>aixirvall</td>\n      <td>Aixirvall</td>\n      <td>06</td>\n      <td>NaN</td>\n      <td>42.466667</td>\n      <td>1.500000</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>ad</td>\n      <td>aixovall</td>\n      <td>Aixovall</td>\n      <td>06</td>\n      <td>NaN</td>\n      <td>42.466667</td>\n      <td>1.483333</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
            "text/plain": "  Country        City  AccentCity Region  Population   Latitude  Longitude\n0      ad       aixas       Aixàs     06         NaN  42.483333   1.466667\n1      ad  aixirivali  Aixirivali     06         NaN  42.466667   1.500000\n2      ad  aixirivall  Aixirivall     06         NaN  42.466667   1.500000\n3      ad   aixirvall   Aixirvall     06         NaN  42.466667   1.500000\n4      ad    aixovall    Aixovall     06         NaN  42.466667   1.483333"
          },
          "execution_count": 5,
          "metadata": {},
          "output_type": "execute_result"
        }
      ]
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "df_places['lat'] = df_places.Latitude.round()\ndf_places['long'] = df_places.Longitude.round()\ndf_places.head()",
      "execution_count": 6,
      "outputs": [
        {
          "data": {
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Country</th>\n      <th>City</th>\n      <th>AccentCity</th>\n      <th>Region</th>\n      <th>Population</th>\n      <th>Latitude</th>\n      <th>Longitude</th>\n      <th>lat</th>\n      <th>long</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>ad</td>\n      <td>aixas</td>\n      <td>Aixàs</td>\n      <td>06</td>\n      <td>NaN</td>\n      <td>42.483333</td>\n      <td>1.466667</td>\n      <td>42.0</td>\n      <td>1.0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>ad</td>\n      <td>aixirivali</td>\n      <td>Aixirivali</td>\n      <td>06</td>\n      <td>NaN</td>\n      <td>42.466667</td>\n      <td>1.500000</td>\n      <td>42.0</td>\n      <td>2.0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>ad</td>\n      <td>aixirivall</td>\n      <td>Aixirivall</td>\n      <td>06</td>\n      <td>NaN</td>\n      <td>42.466667</td>\n      <td>1.500000</td>\n      <td>42.0</td>\n      <td>2.0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>ad</td>\n      <td>aixirvall</td>\n      <td>Aixirvall</td>\n      <td>06</td>\n      <td>NaN</td>\n      <td>42.466667</td>\n      <td>1.500000</td>\n      <td>42.0</td>\n      <td>2.0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>ad</td>\n      <td>aixovall</td>\n      <td>Aixovall</td>\n      <td>06</td>\n      <td>NaN</td>\n      <td>42.466667</td>\n      <td>1.483333</td>\n      <td>42.0</td>\n      <td>1.0</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
            "text/plain": "  Country        City  AccentCity Region  Population   Latitude  Longitude  \\\n0      ad       aixas       Aixàs     06         NaN  42.483333   1.466667   \n1      ad  aixirivali  Aixirivali     06         NaN  42.466667   1.500000   \n2      ad  aixirivall  Aixirivall     06         NaN  42.466667   1.500000   \n3      ad   aixirvall   Aixirvall     06         NaN  42.466667   1.500000   \n4      ad    aixovall    Aixovall     06         NaN  42.466667   1.483333   \n\n    lat  long  \n0  42.0   1.0  \n1  42.0   2.0  \n2  42.0   2.0  \n3  42.0   2.0  \n4  42.0   1.0  "
          },
          "execution_count": 6,
          "metadata": {},
          "output_type": "execute_result"
        }
      ]
    },
    {
      "metadata": {
        "collapsed": true,
        "trusted": false
      },
      "cell_type": "code",
      "source": "#hack (see below)\n# for checking manual addition of Country\tCity\tAccentCity where\n# there are no population figures\ntmp = df_places.loc[df_places['lat'] == 24.0]\ntmp = tmp.loc[df_places['long'] == 40.0]\ntmp.head()",
      "execution_count": null,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Many places have the same `lat` and `long`. Select the place with the highest population."
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "idx = df_places.groupby(['lat','long'])['Population'].transform(max) == df_places['Population']\ndf_places = df_places[idx]\ndf_places.head()",
      "execution_count": 8,
      "outputs": [
        {
          "data": {
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Country</th>\n      <th>City</th>\n      <th>AccentCity</th>\n      <th>Region</th>\n      <th>Population</th>\n      <th>Latitude</th>\n      <th>Longitude</th>\n      <th>lat</th>\n      <th>long</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>93</th>\n      <td>ae</td>\n      <td>abu dhabi</td>\n      <td>Abu Dhabi</td>\n      <td>01</td>\n      <td>603687.0</td>\n      <td>24.466667</td>\n      <td>54.366667</td>\n      <td>24.0</td>\n      <td>54.0</td>\n    </tr>\n    <tr>\n      <th>242</th>\n      <td>ae</td>\n      <td>dubai</td>\n      <td>Dubai</td>\n      <td>03</td>\n      <td>1137376.0</td>\n      <td>25.258172</td>\n      <td>55.304717</td>\n      <td>25.0</td>\n      <td>55.0</td>\n    </tr>\n    <tr>\n      <th>4547</th>\n      <td>af</td>\n      <td>asadabad</td>\n      <td>Asadabad</td>\n      <td>34</td>\n      <td>48400.0</td>\n      <td>34.873113</td>\n      <td>71.146970</td>\n      <td>35.0</td>\n      <td>71.0</td>\n    </tr>\n    <tr>\n      <th>5410</th>\n      <td>af</td>\n      <td>aybak</td>\n      <td>Aybak</td>\n      <td>32</td>\n      <td>47877.0</td>\n      <td>36.264680</td>\n      <td>68.015512</td>\n      <td>36.0</td>\n      <td>68.0</td>\n    </tr>\n    <tr>\n      <th>6644</th>\n      <td>af</td>\n      <td>baglan</td>\n      <td>Baglan</td>\n      <td>03</td>\n      <td>108481.0</td>\n      <td>36.130684</td>\n      <td>68.708286</td>\n      <td>36.0</td>\n      <td>69.0</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
            "text/plain": "     Country       City AccentCity Region  Population   Latitude  Longitude  \\\n93        ae  abu dhabi  Abu Dhabi     01    603687.0  24.466667  54.366667   \n242       ae      dubai      Dubai     03   1137376.0  25.258172  55.304717   \n4547      af   asadabad   Asadabad     34     48400.0  34.873113  71.146970   \n5410      af      aybak      Aybak     32     47877.0  36.264680  68.015512   \n6644      af     baglan     Baglan     03    108481.0  36.130684  68.708286   \n\n       lat  long  \n93    24.0  54.0  \n242   25.0  55.0  \n4547  35.0  71.0  \n5410  36.0  68.0  \n6644  36.0  69.0  "
          },
          "execution_count": 8,
          "metadata": {},
          "output_type": "execute_result"
        }
      ]
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "df_map = df.merge(df_places, how='left', on=['lat','long'])\ndf_map=df_map[['lat','long','count','Country','AccentCity','Region']]\ndf_map.head()",
      "execution_count": 9,
      "outputs": [
        {
          "data": {
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>lat</th>\n      <th>long</th>\n      <th>count</th>\n      <th>Country</th>\n      <th>AccentCity</th>\n      <th>Region</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>-44</td>\n      <td>173</td>\n      <td>1</td>\n      <td>nz</td>\n      <td>Christchurch</td>\n      <td>E9</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>-43</td>\n      <td>147</td>\n      <td>1</td>\n      <td>au</td>\n      <td>Hobart</td>\n      <td>06</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>-39</td>\n      <td>-73</td>\n      <td>1</td>\n      <td>cl</td>\n      <td>Temuco</td>\n      <td>04</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>-38</td>\n      <td>145</td>\n      <td>29</td>\n      <td>au</td>\n      <td>Melbourne</td>\n      <td>07</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>-35</td>\n      <td>-59</td>\n      <td>1</td>\n      <td>ar</td>\n      <td>Lujan</td>\n      <td>01</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
            "text/plain": "   lat  long  count Country    AccentCity Region\n0  -44   173      1      nz  Christchurch     E9\n1  -43   147      1      au        Hobart     06\n2  -39   -73      1      cl        Temuco     04\n3  -38   145     29      au     Melbourne     07\n4  -35   -59      1      ar         Lujan     01"
          },
          "execution_count": 9,
          "metadata": {},
          "output_type": "execute_result"
        }
      ]
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "df_map.shape, df_map['count'].sum() #fails sanity check",
      "execution_count": 10,
      "outputs": [
        {
          "data": {
            "text/plain": "((496, 6), 2801)"
          },
          "execution_count": 10,
          "metadata": {},
          "output_type": "execute_result"
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Problem with repeated values of `lat\tlong\tcount`"
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "for i in range (1, df_map.shape[0]):\n    if (df_map.loc[i,['lat','long', 'count']] == df_map.loc[i-1,['lat','long', 'count']]).all():\n        print (i)",
      "execution_count": 11,
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": "109\n149\n241\n444\n490\n"
        }
      ]
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "df_map.iloc[108:110]",
      "execution_count": 12,
      "outputs": [
        {
          "data": {
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>lat</th>\n      <th>long</th>\n      <th>count</th>\n      <th>Country</th>\n      <th>AccentCity</th>\n      <th>Region</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>108</th>\n      <td>21</td>\n      <td>106</td>\n      <td>4</td>\n      <td>vn</td>\n      <td>Hanoi</td>\n      <td>44</td>\n    </tr>\n    <tr>\n      <th>109</th>\n      <td>21</td>\n      <td>106</td>\n      <td>4</td>\n      <td>vn</td>\n      <td>Ha Noi</td>\n      <td>44</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
            "text/plain": "     lat  long  count Country AccentCity Region\n108   21   106      4      vn      Hanoi     44\n109   21   106      4      vn     Ha Noi     44"
          },
          "execution_count": 12,
          "metadata": {},
          "output_type": "execute_result"
        }
      ]
    },
    {
      "metadata": {
        "collapsed": true,
        "trusted": false
      },
      "cell_type": "code",
      "source": "df_map.drop(df_map.index[109], inplace=True)",
      "execution_count": 13,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "df_map.iloc[147:149]",
      "execution_count": 14,
      "outputs": [
        {
          "data": {
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>lat</th>\n      <th>long</th>\n      <th>count</th>\n      <th>Country</th>\n      <th>AccentCity</th>\n      <th>Region</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>148</th>\n      <td>29</td>\n      <td>77</td>\n      <td>67</td>\n      <td>in</td>\n      <td>Delhi</td>\n      <td>07</td>\n    </tr>\n    <tr>\n      <th>149</th>\n      <td>29</td>\n      <td>77</td>\n      <td>67</td>\n      <td>in</td>\n      <td>New Delhi</td>\n      <td>07</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
            "text/plain": "     lat  long  count Country AccentCity Region\n148   29    77     67      in      Delhi     07\n149   29    77     67      in  New Delhi     07"
          },
          "execution_count": 14,
          "metadata": {},
          "output_type": "execute_result"
        }
      ]
    },
    {
      "metadata": {
        "collapsed": true,
        "trusted": false
      },
      "cell_type": "code",
      "source": "df_map.drop(df_map.index[148], inplace=True)",
      "execution_count": 15,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "df_map.iloc[238:240]",
      "execution_count": 16,
      "outputs": [
        {
          "data": {
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>lat</th>\n      <th>long</th>\n      <th>count</th>\n      <th>Country</th>\n      <th>AccentCity</th>\n      <th>Region</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>240</th>\n      <td>39</td>\n      <td>-95</td>\n      <td>1</td>\n      <td>us</td>\n      <td>Kansas City</td>\n      <td>KS</td>\n    </tr>\n    <tr>\n      <th>241</th>\n      <td>39</td>\n      <td>-95</td>\n      <td>1</td>\n      <td>us</td>\n      <td>Kansas City</td>\n      <td>MO</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
            "text/plain": "     lat  long  count Country   AccentCity Region\n240   39   -95      1      us  Kansas City     KS\n241   39   -95      1      us  Kansas City     MO"
          },
          "execution_count": 16,
          "metadata": {},
          "output_type": "execute_result"
        }
      ]
    },
    {
      "metadata": {
        "collapsed": true,
        "trusted": false
      },
      "cell_type": "code",
      "source": "df_map.drop(df_map.index[239], inplace=True)",
      "execution_count": 17,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "df_map.iloc[440:442]",
      "execution_count": 18,
      "outputs": [
        {
          "data": {
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>lat</th>\n      <th>long</th>\n      <th>count</th>\n      <th>Country</th>\n      <th>AccentCity</th>\n      <th>Region</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>443</th>\n      <td>53</td>\n      <td>-1</td>\n      <td>7</td>\n      <td>gb</td>\n      <td>Leicester</td>\n      <td>H4</td>\n    </tr>\n    <tr>\n      <th>444</th>\n      <td>53</td>\n      <td>-1</td>\n      <td>7</td>\n      <td>gb</td>\n      <td>Leicester</td>\n      <td>H5</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
            "text/plain": "     lat  long  count Country AccentCity Region\n443   53    -1      7      gb  Leicester     H4\n444   53    -1      7      gb  Leicester     H5"
          },
          "execution_count": 18,
          "metadata": {},
          "output_type": "execute_result"
        }
      ]
    },
    {
      "metadata": {
        "collapsed": true,
        "trusted": false
      },
      "cell_type": "code",
      "source": "df_map.drop(df_map.index[441], inplace=True)",
      "execution_count": 19,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "df_map.iloc[485:487]",
      "execution_count": 20,
      "outputs": [
        {
          "data": {
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>lat</th>\n      <th>long</th>\n      <th>count</th>\n      <th>Country</th>\n      <th>AccentCity</th>\n      <th>Region</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>489</th>\n      <td>60</td>\n      <td>18</td>\n      <td>1</td>\n      <td>se</td>\n      <td>Uppsala</td>\n      <td>21</td>\n    </tr>\n    <tr>\n      <th>490</th>\n      <td>60</td>\n      <td>18</td>\n      <td>1</td>\n      <td>se</td>\n      <td>Uppsala</td>\n      <td>26</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
            "text/plain": "     lat  long  count Country AccentCity Region\n489   60    18      1      se    Uppsala     21\n490   60    18      1      se    Uppsala     26"
          },
          "execution_count": 20,
          "metadata": {},
          "output_type": "execute_result"
        }
      ]
    },
    {
      "metadata": {
        "collapsed": true,
        "trusted": false
      },
      "cell_type": "code",
      "source": "df_map.drop(df_map.index[486], inplace=True)",
      "execution_count": 21,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "df_map.shape, df_map['count'].sum() #passes sanity check",
      "execution_count": 22,
      "outputs": [
        {
          "data": {
            "text/plain": "((491, 6), 2721)"
          },
          "execution_count": 22,
          "metadata": {},
          "output_type": "execute_result"
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Some places had no `Population` values, all `NaN`s"
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "df_map.iloc[df_map.isnull().any(1).nonzero()[0]]",
      "execution_count": 23,
      "outputs": [
        {
          "data": {
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>lat</th>\n      <th>long</th>\n      <th>count</th>\n      <th>Country</th>\n      <th>AccentCity</th>\n      <th>Region</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>122</th>\n      <td>24</td>\n      <td>40</td>\n      <td>1</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>172</th>\n      <td>31</td>\n      <td>122</td>\n      <td>1</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>186</th>\n      <td>33</td>\n      <td>77</td>\n      <td>2</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>200</th>\n      <td>35</td>\n      <td>127</td>\n      <td>1</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>215</th>\n      <td>36</td>\n      <td>108</td>\n      <td>1</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>216</th>\n      <td>36</td>\n      <td>127</td>\n      <td>1</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>217</th>\n      <td>36</td>\n      <td>129</td>\n      <td>1</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>234</th>\n      <td>38</td>\n      <td>0</td>\n      <td>2</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>249</th>\n      <td>39</td>\n      <td>39</td>\n      <td>1</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>354</th>\n      <td>47</td>\n      <td>-89</td>\n      <td>1</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
            "text/plain": "     lat  long  count Country AccentCity Region\n122   24    40      1     NaN        NaN    NaN\n172   31   122      1     NaN        NaN    NaN\n186   33    77      2     NaN        NaN    NaN\n200   35   127      1     NaN        NaN    NaN\n215   36   108      1     NaN        NaN    NaN\n216   36   127      1     NaN        NaN    NaN\n217   36   129      1     NaN        NaN    NaN\n234   38     0      2     NaN        NaN    NaN\n249   39    39      1     NaN        NaN    NaN\n354   47   -89      1     NaN        NaN    NaN"
          },
          "execution_count": 23,
          "metadata": {},
          "output_type": "execute_result"
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Manually allocate `Country AccentCity Region` by looking at\n\nhttps://www.findlatitudeandlongitude.com\n\nand looking at data from `df_places` at the `#hack` above\n\n122\t24\t40 sa Medina 05\n\n172\t31\t122 cn Shanghai 23\n\n186\t33\t77 in Darcha 11\n\n200\t35\t127 kr Gwangju 16\n\n215\t36\t108 cn Qingyang 15\n\n216\t36\t127 kr Jeonju 17\n\n217\t36\t129 kr Yeongcheon 14\n\n234\t38\t0 es San Juan de Alicante 60\n\n249\t39\t39 tr Elazig 62\n\n354\t47\t-89 us Houghton MI"
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "df_map.loc[122, ['Country','AccentCity','Region']]=['sa','Medina','05']\ndf_map.loc[172, ['Country','AccentCity','Region']]=['cn','Shanghai','23']\ndf_map.loc[186, ['Country','AccentCity','Region']]=['in','Darcha','11']\ndf_map.loc[200, ['Country','AccentCity','Region']]=['kr','Gwangju','16']\ndf_map.loc[215, ['Country','AccentCity','Region']]=['cn','Qingyang','15']\ndf_map.loc[216, ['Country','AccentCity','Region']]=['kr','Jeonju','17']\ndf_map.loc[217, ['Country','AccentCity','Region']]=['kr','Yeongcheon','14']\ndf_map.loc[234, ['Country','AccentCity','Region']]=['es', 'San Juan de Alicante', '60']\ndf_map.loc[249, ['Country','AccentCity','Region']]=['tr', 'Elazig', '62']\ndf_map.loc[354, ['Country','AccentCity','Region']]=['us', 'Houghton', 'MI']\ndf_map.loc[122]",
      "execution_count": 24,
      "outputs": [
        {
          "data": {
            "text/plain": "lat               24\nlong              40\ncount              1\nCountry           sa\nAccentCity    Medina\nRegion            05\nName: 122, dtype: object"
          },
          "execution_count": 24,
          "metadata": {},
          "output_type": "execute_result"
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Convert two-letter country codes to upper case"
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "df_map['Country'] = df_map['Country'].apply(lambda x: x.upper())\ndf_map.head()",
      "execution_count": 25,
      "outputs": [
        {
          "data": {
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>lat</th>\n      <th>long</th>\n      <th>count</th>\n      <th>Country</th>\n      <th>AccentCity</th>\n      <th>Region</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>-44</td>\n      <td>173</td>\n      <td>1</td>\n      <td>NZ</td>\n      <td>Christchurch</td>\n      <td>E9</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>-43</td>\n      <td>147</td>\n      <td>1</td>\n      <td>AU</td>\n      <td>Hobart</td>\n      <td>06</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>-39</td>\n      <td>-73</td>\n      <td>1</td>\n      <td>CL</td>\n      <td>Temuco</td>\n      <td>04</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>-38</td>\n      <td>145</td>\n      <td>29</td>\n      <td>AU</td>\n      <td>Melbourne</td>\n      <td>07</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>-35</td>\n      <td>-59</td>\n      <td>1</td>\n      <td>AR</td>\n      <td>Lujan</td>\n      <td>01</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
            "text/plain": "   lat  long  count Country    AccentCity Region\n0  -44   173      1      NZ  Christchurch     E9\n1  -43   147      1      AU        Hobart     06\n2  -39   -73      1      CL        Temuco     04\n3  -38   145     29      AU     Melbourne     07\n4  -35   -59      1      AR         Lujan     01"
          },
          "execution_count": 25,
          "metadata": {},
          "output_type": "execute_result"
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "ISO countries and regional codes downloaded from github\n\nhttps://github.com/lukes/ISO-3166-Countries-with-Regional-Codes"
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "df_iso = pd.read_csv(f'{PATH}all.csv')\ndf_iso.head()",
      "execution_count": 26,
      "outputs": [
        {
          "data": {
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>name</th>\n      <th>alpha-2</th>\n      <th>alpha-3</th>\n      <th>country-code</th>\n      <th>iso_3166-2</th>\n      <th>region</th>\n      <th>sub-region</th>\n      <th>intermediate-region</th>\n      <th>region-code</th>\n      <th>sub-region-code</th>\n      <th>intermediate-region-code</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Afghanistan</td>\n      <td>AF</td>\n      <td>AFG</td>\n      <td>4</td>\n      <td>ISO 3166-2:AF</td>\n      <td>Asia</td>\n      <td>Southern Asia</td>\n      <td>NaN</td>\n      <td>142.0</td>\n      <td>34.0</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Åland Islands</td>\n      <td>AX</td>\n      <td>ALA</td>\n      <td>248</td>\n      <td>ISO 3166-2:AX</td>\n      <td>Europe</td>\n      <td>Northern Europe</td>\n      <td>NaN</td>\n      <td>150.0</td>\n      <td>154.0</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Albania</td>\n      <td>AL</td>\n      <td>ALB</td>\n      <td>8</td>\n      <td>ISO 3166-2:AL</td>\n      <td>Europe</td>\n      <td>Southern Europe</td>\n      <td>NaN</td>\n      <td>150.0</td>\n      <td>39.0</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Algeria</td>\n      <td>DZ</td>\n      <td>DZA</td>\n      <td>12</td>\n      <td>ISO 3166-2:DZ</td>\n      <td>Africa</td>\n      <td>Northern Africa</td>\n      <td>NaN</td>\n      <td>2.0</td>\n      <td>15.0</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>American Samoa</td>\n      <td>AS</td>\n      <td>ASM</td>\n      <td>16</td>\n      <td>ISO 3166-2:AS</td>\n      <td>Oceania</td>\n      <td>Polynesia</td>\n      <td>NaN</td>\n      <td>9.0</td>\n      <td>61.0</td>\n      <td>NaN</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
            "text/plain": "             name alpha-2 alpha-3  country-code     iso_3166-2   region  \\\n0     Afghanistan      AF     AFG             4  ISO 3166-2:AF     Asia   \n1   Åland Islands      AX     ALA           248  ISO 3166-2:AX   Europe   \n2         Albania      AL     ALB             8  ISO 3166-2:AL   Europe   \n3         Algeria      DZ     DZA            12  ISO 3166-2:DZ   Africa   \n4  American Samoa      AS     ASM            16  ISO 3166-2:AS  Oceania   \n\n        sub-region intermediate-region  region-code  sub-region-code  \\\n0    Southern Asia                 NaN        142.0             34.0   \n1  Northern Europe                 NaN        150.0            154.0   \n2  Southern Europe                 NaN        150.0             39.0   \n3  Northern Africa                 NaN          2.0             15.0   \n4        Polynesia                 NaN          9.0             61.0   \n\n   intermediate-region-code  \n0                       NaN  \n1                       NaN  \n2                       NaN  \n3                       NaN  \n4                       NaN  "
          },
          "execution_count": 26,
          "metadata": {},
          "output_type": "execute_result"
        }
      ]
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "df_iso.rename(columns = {'alpha-2':'Country'}, inplace=True)\ndf_map = df_map.merge(df_iso, how='left', on=['Country'])\ndf_map.head()",
      "execution_count": 27,
      "outputs": [
        {
          "data": {
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>lat</th>\n      <th>long</th>\n      <th>count</th>\n      <th>Country</th>\n      <th>AccentCity</th>\n      <th>Region</th>\n      <th>name</th>\n      <th>alpha-3</th>\n      <th>country-code</th>\n      <th>iso_3166-2</th>\n      <th>region</th>\n      <th>sub-region</th>\n      <th>intermediate-region</th>\n      <th>region-code</th>\n      <th>sub-region-code</th>\n      <th>intermediate-region-code</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>-44</td>\n      <td>173</td>\n      <td>1</td>\n      <td>NZ</td>\n      <td>Christchurch</td>\n      <td>E9</td>\n      <td>New Zealand</td>\n      <td>NZL</td>\n      <td>554</td>\n      <td>ISO 3166-2:NZ</td>\n      <td>Oceania</td>\n      <td>Australia and New Zealand</td>\n      <td>NaN</td>\n      <td>9.0</td>\n      <td>53.0</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>-43</td>\n      <td>147</td>\n      <td>1</td>\n      <td>AU</td>\n      <td>Hobart</td>\n      <td>06</td>\n      <td>Australia</td>\n      <td>AUS</td>\n      <td>36</td>\n      <td>ISO 3166-2:AU</td>\n      <td>Oceania</td>\n      <td>Australia and New Zealand</td>\n      <td>NaN</td>\n      <td>9.0</td>\n      <td>53.0</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>-39</td>\n      <td>-73</td>\n      <td>1</td>\n      <td>CL</td>\n      <td>Temuco</td>\n      <td>04</td>\n      <td>Chile</td>\n      <td>CHL</td>\n      <td>152</td>\n      <td>ISO 3166-2:CL</td>\n      <td>Americas</td>\n      <td>Latin America and the Caribbean</td>\n      <td>South America</td>\n      <td>19.0</td>\n      <td>419.0</td>\n      <td>5.0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>-38</td>\n      <td>145</td>\n      <td>29</td>\n      <td>AU</td>\n      <td>Melbourne</td>\n      <td>07</td>\n      <td>Australia</td>\n      <td>AUS</td>\n      <td>36</td>\n      <td>ISO 3166-2:AU</td>\n      <td>Oceania</td>\n      <td>Australia and New Zealand</td>\n      <td>NaN</td>\n      <td>9.0</td>\n      <td>53.0</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>-35</td>\n      <td>-59</td>\n      <td>1</td>\n      <td>AR</td>\n      <td>Lujan</td>\n      <td>01</td>\n      <td>Argentina</td>\n      <td>ARG</td>\n      <td>32</td>\n      <td>ISO 3166-2:AR</td>\n      <td>Americas</td>\n      <td>Latin America and the Caribbean</td>\n      <td>South America</td>\n      <td>19.0</td>\n      <td>419.0</td>\n      <td>5.0</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
            "text/plain": "   lat  long  count Country    AccentCity Region         name alpha-3  \\\n0  -44   173      1      NZ  Christchurch     E9  New Zealand     NZL   \n1  -43   147      1      AU        Hobart     06    Australia     AUS   \n2  -39   -73      1      CL        Temuco     04        Chile     CHL   \n3  -38   145     29      AU     Melbourne     07    Australia     AUS   \n4  -35   -59      1      AR         Lujan     01    Argentina     ARG   \n\n   country-code     iso_3166-2    region                       sub-region  \\\n0           554  ISO 3166-2:NZ   Oceania        Australia and New Zealand   \n1            36  ISO 3166-2:AU   Oceania        Australia and New Zealand   \n2           152  ISO 3166-2:CL  Americas  Latin America and the Caribbean   \n3            36  ISO 3166-2:AU   Oceania        Australia and New Zealand   \n4            32  ISO 3166-2:AR  Americas  Latin America and the Caribbean   \n\n  intermediate-region  region-code  sub-region-code  intermediate-region-code  \n0                 NaN          9.0             53.0                       NaN  \n1                 NaN          9.0             53.0                       NaN  \n2       South America         19.0            419.0                       5.0  \n3                 NaN          9.0             53.0                       NaN  \n4       South America         19.0            419.0                       5.0  "
          },
          "execution_count": 27,
          "metadata": {},
          "output_type": "execute_result"
        }
      ]
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "df_map.groupby('sub-region').agg({'count':'sum'}).sort_values('count', ascending=False)",
      "execution_count": 28,
      "outputs": [
        {
          "data": {
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>count</th>\n    </tr>\n    <tr>\n      <th>sub-region</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Northern America</th>\n      <td>807</td>\n    </tr>\n    <tr>\n      <th>Southern Asia</th>\n      <td>730</td>\n    </tr>\n    <tr>\n      <th>Western Europe</th>\n      <td>223</td>\n    </tr>\n    <tr>\n      <th>Eastern Europe</th>\n      <td>168</td>\n    </tr>\n    <tr>\n      <th>Northern Europe</th>\n      <td>164</td>\n    </tr>\n    <tr>\n      <th>South-eastern Asia</th>\n      <td>126</td>\n    </tr>\n    <tr>\n      <th>Sub-Saharan Africa</th>\n      <td>93</td>\n    </tr>\n    <tr>\n      <th>Latin America and the Caribbean</th>\n      <td>84</td>\n    </tr>\n    <tr>\n      <th>Australia and New Zealand</th>\n      <td>81</td>\n    </tr>\n    <tr>\n      <th>Southern Europe</th>\n      <td>79</td>\n    </tr>\n    <tr>\n      <th>Eastern Asia</th>\n      <td>69</td>\n    </tr>\n    <tr>\n      <th>Northern Africa</th>\n      <td>47</td>\n    </tr>\n    <tr>\n      <th>Western Asia</th>\n      <td>45</td>\n    </tr>\n    <tr>\n      <th>Central Asia</th>\n      <td>5</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
            "text/plain": "                                 count\nsub-region                            \nNorthern America                   807\nSouthern Asia                      730\nWestern Europe                     223\nEastern Europe                     168\nNorthern Europe                    164\nSouth-eastern Asia                 126\nSub-Saharan Africa                  93\nLatin America and the Caribbean     84\nAustralia and New Zealand           81\nSouthern Europe                     79\nEastern Asia                        69\nNorthern Africa                     47\nWestern Asia                        45\nCentral Asia                         5"
          },
          "execution_count": 28,
          "metadata": {},
          "output_type": "execute_result"
        }
      ]
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "df_map.shape, df_map['count'].sum() #passes sanity check",
      "execution_count": 30,
      "outputs": [
        {
          "data": {
            "text/plain": "((491, 16), 2721)"
          },
          "execution_count": 30,
          "metadata": {},
          "output_type": "execute_result"
        }
      ]
    },
    {
      "metadata": {
        "collapsed": true,
        "trusted": false
      },
      "cell_type": "code",
      "source": "df_map.to_csv('map.csv')",
      "execution_count": 29,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Steps in Tableau\n\n1. ‘Connect’ to a ‘Text file’ map.csv\n2. Go to worksheet\n3. In ‘Measures’ click on # symbol left of ‘Long’, set ‘Geographic role’ to ‘Longitude’\n4. Drag ‘Lat’ and ‘Long’ to ‘Dimensions’\n5. Drag ‘Lat’ and ‘Long’ to ‘Columns’ and ‘Rows’\n6. Click on world map on the right\n7. Drag to ‘Marks’ : ‘Count’, ‘Accent City’, ‘Country’, ‘Country-Code’, ‘Name’, ‘Sub-Region’\n8. Drag ‘Sub-Region’ to ‘Color’ box\n9. Drag ‘SUM(Count)’ to ‘Size’ box, adjust with slider\n"
    }
  ],
  "metadata": {
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3",
      "language": "python"
    },
    "language_info": {
      "name": "python",
      "version": "3.6.4",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    },
    "gist": {
      "id": "",
      "data": {
        "description": "fast_ai_v1_p3_people_map.ipynb",
        "public": true
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
}