Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save analyticsindiamagazine/2741d7d89cd49c3ffe4dda5ba9e46fcf to your computer and use it in GitHub Desktop.
Save analyticsindiamagazine/2741d7d89cd49c3ffe4dda5ba9e46fcf to your computer and use it in GitHub Desktop.
COVID-19 - TimeSeries - Sample Submissions.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
},
"colab": {
"name": "COVID-19 - TimeSeries - Sample Submissions.ipynb",
"provenance": [],
"include_colab_link": true
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/analyticsindiamagazine/2741d7d89cd49c3ffe4dda5ba9e46fcf/covid-19-timeseries-sample-submissions.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"id": "bX8THuepSX5f",
"colab_type": "code",
"colab": {}
},
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "2_wfNuWtSX5l",
"colab_type": "text"
},
"source": [
"# Read the Data\n"
]
},
{
"cell_type": "code",
"metadata": {
"id": "FeFJIdRESX5m",
"colab_type": "code",
"colab": {},
"outputId": "8b2d0542-236f-4259-e002-fb8cfa037cba"
},
"source": [
"confirm_data_df_old = pd.read_csv(r'/Users/anurag/Downloads/time_series_19-covid-Confirmed (2).txt')\n",
"confirm_data_df_old.head()"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Province/State</th>\n",
" <th>Country/Region</th>\n",
" <th>Lat</th>\n",
" <th>Long</th>\n",
" <th>1/22/20</th>\n",
" <th>1/23/20</th>\n",
" <th>1/24/20</th>\n",
" <th>1/25/20</th>\n",
" <th>1/26/20</th>\n",
" <th>1/27/20</th>\n",
" <th>...</th>\n",
" <th>3/2/20</th>\n",
" <th>3/3/20</th>\n",
" <th>3/4/20</th>\n",
" <th>3/5/20</th>\n",
" <th>3/6/20</th>\n",
" <th>3/7/20</th>\n",
" <th>3/8/20</th>\n",
" <th>3/9/20</th>\n",
" <th>3/10/20</th>\n",
" <th>3/11/20</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>Thailand</td>\n",
" <td>15.0000</td>\n",
" <td>101.0000</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>...</td>\n",
" <td>43</td>\n",
" <td>43</td>\n",
" <td>43</td>\n",
" <td>47</td>\n",
" <td>48</td>\n",
" <td>50</td>\n",
" <td>50</td>\n",
" <td>50</td>\n",
" <td>53</td>\n",
" <td>59</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>Japan</td>\n",
" <td>36.0000</td>\n",
" <td>138.0000</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>...</td>\n",
" <td>274</td>\n",
" <td>293</td>\n",
" <td>331</td>\n",
" <td>360</td>\n",
" <td>420</td>\n",
" <td>461</td>\n",
" <td>502</td>\n",
" <td>511</td>\n",
" <td>581</td>\n",
" <td>639</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>Singapore</td>\n",
" <td>1.2833</td>\n",
" <td>103.8333</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>...</td>\n",
" <td>108</td>\n",
" <td>110</td>\n",
" <td>110</td>\n",
" <td>117</td>\n",
" <td>130</td>\n",
" <td>138</td>\n",
" <td>150</td>\n",
" <td>150</td>\n",
" <td>160</td>\n",
" <td>178</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>Nepal</td>\n",
" <td>28.1667</td>\n",
" <td>84.2500</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>NaN</td>\n",
" <td>Malaysia</td>\n",
" <td>2.5000</td>\n",
" <td>112.5000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>...</td>\n",
" <td>29</td>\n",
" <td>36</td>\n",
" <td>50</td>\n",
" <td>50</td>\n",
" <td>83</td>\n",
" <td>93</td>\n",
" <td>99</td>\n",
" <td>117</td>\n",
" <td>129</td>\n",
" <td>149</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 54 columns</p>\n",
"</div>"
],
"text/plain": [
" Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 \\\n",
"0 NaN Thailand 15.0000 101.0000 2 3 5 \n",
"1 NaN Japan 36.0000 138.0000 2 1 2 \n",
"2 NaN Singapore 1.2833 103.8333 0 1 3 \n",
"3 NaN Nepal 28.1667 84.2500 0 0 0 \n",
"4 NaN Malaysia 2.5000 112.5000 0 0 0 \n",
"\n",
" 1/25/20 1/26/20 1/27/20 ... 3/2/20 3/3/20 3/4/20 3/5/20 3/6/20 \\\n",
"0 7 8 8 ... 43 43 43 47 48 \n",
"1 2 4 4 ... 274 293 331 360 420 \n",
"2 3 4 5 ... 108 110 110 117 130 \n",
"3 1 1 1 ... 1 1 1 1 1 \n",
"4 3 4 4 ... 29 36 50 50 83 \n",
"\n",
" 3/7/20 3/8/20 3/9/20 3/10/20 3/11/20 \n",
"0 50 50 50 53 59 \n",
"1 461 502 511 581 639 \n",
"2 138 150 150 160 178 \n",
"3 1 1 1 1 1 \n",
"4 93 99 117 129 149 \n",
"\n",
"[5 rows x 54 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 4
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "PV4TbEYTSX5p",
"colab_type": "code",
"colab": {},
"outputId": "648cbeee-f4f9-43a1-caeb-dacdf5e6a950"
},
"source": [
"confirm_data_df_old.shape"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(404, 54)"
]
},
"metadata": {
"tags": []
},
"execution_count": 5
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "MkwaHKmzSX5s",
"colab_type": "code",
"colab": {},
"outputId": "ed801a89-fc16-4ae3-dba9-4104cacb22a2"
},
"source": [
"confirm_data_df_old['Country/Region'].nunique(), confirm_data_df_old['Province/State'].nunique()"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(114, 297)"
]
},
"metadata": {
"tags": []
},
"execution_count": 6
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "L6VMxXxpSX5v",
"colab_type": "code",
"colab": {}
},
"source": [
"# confirm_data_df.replace({'China' : 'Mainland China'}, inplace=True)\n",
"# confirm_data_df.replace({'Taiwan*' : 'Taiwan'}, inplace=True)\n",
"\n",
"confirm_data_df_old.replace({'Taiwan*' : 'Taiwan'}, inplace=True)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Z4CFqvRDSX5x",
"colab_type": "code",
"colab": {},
"outputId": "b55552d3-9449-42a5-8f64-7fca8a2503f5"
},
"source": [
"reco_data_df = pd.read_csv(r'/Users/anurag/Downloads/time_series_19-covid-Recovered (2).txt').iloc[:-2, :-1]\n",
"reco_data_df.head()"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Province/State</th>\n",
" <th>Country/Region</th>\n",
" <th>Lat</th>\n",
" <th>Long</th>\n",
" <th>1/22/20</th>\n",
" <th>1/23/20</th>\n",
" <th>1/24/20</th>\n",
" <th>1/25/20</th>\n",
" <th>1/26/20</th>\n",
" <th>1/27/20</th>\n",
" <th>...</th>\n",
" <th>3/2/20</th>\n",
" <th>3/3/20</th>\n",
" <th>3/4/20</th>\n",
" <th>3/5/20</th>\n",
" <th>3/6/20</th>\n",
" <th>3/7/20</th>\n",
" <th>3/8/20</th>\n",
" <th>3/9/20</th>\n",
" <th>3/10/20</th>\n",
" <th>3/11/20</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>Thailand</td>\n",
" <td>15.0000</td>\n",
" <td>101.0000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>31</td>\n",
" <td>31</td>\n",
" <td>31</td>\n",
" <td>31</td>\n",
" <td>31</td>\n",
" <td>31</td>\n",
" <td>31</td>\n",
" <td>31</td>\n",
" <td>33</td>\n",
" <td>34</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>Japan</td>\n",
" <td>36.0000</td>\n",
" <td>138.0000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>32</td>\n",
" <td>43</td>\n",
" <td>43</td>\n",
" <td>43</td>\n",
" <td>46</td>\n",
" <td>76</td>\n",
" <td>76</td>\n",
" <td>76</td>\n",
" <td>101</td>\n",
" <td>118</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>Singapore</td>\n",
" <td>1.2833</td>\n",
" <td>103.8333</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>78</td>\n",
" <td>78</td>\n",
" <td>78</td>\n",
" <td>78</td>\n",
" <td>78</td>\n",
" <td>78</td>\n",
" <td>78</td>\n",
" <td>78</td>\n",
" <td>78</td>\n",
" <td>96</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>Nepal</td>\n",
" <td>28.1667</td>\n",
" <td>84.2500</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>NaN</td>\n",
" <td>Malaysia</td>\n",
" <td>2.5000</td>\n",
" <td>112.5000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>18</td>\n",
" <td>22</td>\n",
" <td>22</td>\n",
" <td>22</td>\n",
" <td>22</td>\n",
" <td>23</td>\n",
" <td>24</td>\n",
" <td>24</td>\n",
" <td>24</td>\n",
" <td>26</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 54 columns</p>\n",
"</div>"
],
"text/plain": [
" Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 \\\n",
"0 NaN Thailand 15.0000 101.0000 0 0 0 \n",
"1 NaN Japan 36.0000 138.0000 0 0 0 \n",
"2 NaN Singapore 1.2833 103.8333 0 0 0 \n",
"3 NaN Nepal 28.1667 84.2500 0 0 0 \n",
"4 NaN Malaysia 2.5000 112.5000 0 0 0 \n",
"\n",
" 1/25/20 1/26/20 1/27/20 ... 3/2/20 3/3/20 3/4/20 3/5/20 3/6/20 \\\n",
"0 0 2 2 ... 31 31 31 31 31 \n",
"1 0 1 1 ... 32 43 43 43 46 \n",
"2 0 0 0 ... 78 78 78 78 78 \n",
"3 0 0 0 ... 1 1 1 1 1 \n",
"4 0 0 0 ... 18 22 22 22 22 \n",
"\n",
" 3/7/20 3/8/20 3/9/20 3/10/20 3/11/20 \n",
"0 31 31 31 33 34 \n",
"1 76 76 76 101 118 \n",
"2 78 78 78 78 96 \n",
"3 1 1 1 1 1 \n",
"4 23 24 24 24 26 \n",
"\n",
"[5 rows x 54 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 10
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Rg5slp8jSX50",
"colab_type": "code",
"colab": {},
"outputId": "b3ada999-be2d-4177-88b2-582a601455d5"
},
"source": [
"reco_data_df.shape"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(404, 54)"
]
},
"metadata": {
"tags": []
},
"execution_count": 11
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "nqxbGS8FSX53",
"colab_type": "code",
"colab": {},
"outputId": "bc30a800-15cd-4801-b55b-8c7d2c835af5"
},
"source": [
"death_data_df = pd.read_csv(r'/Users/anurag/Downloads/time_series_19-covid-Deaths (2).txt').iloc[:-2, :-1]\n",
"death_data_df.head()"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Province/State</th>\n",
" <th>Country/Region</th>\n",
" <th>Lat</th>\n",
" <th>Long</th>\n",
" <th>1/22/20</th>\n",
" <th>1/23/20</th>\n",
" <th>1/24/20</th>\n",
" <th>1/25/20</th>\n",
" <th>1/26/20</th>\n",
" <th>1/27/20</th>\n",
" <th>...</th>\n",
" <th>3/2/20</th>\n",
" <th>3/3/20</th>\n",
" <th>3/4/20</th>\n",
" <th>3/5/20</th>\n",
" <th>3/6/20</th>\n",
" <th>3/7/20</th>\n",
" <th>3/8/20</th>\n",
" <th>3/9/20</th>\n",
" <th>3/10/20</th>\n",
" <th>3/11/20</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>Thailand</td>\n",
" <td>15.0000</td>\n",
" <td>101.0000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>Japan</td>\n",
" <td>36.0000</td>\n",
" <td>138.0000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>10</td>\n",
" <td>10</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>Singapore</td>\n",
" <td>1.2833</td>\n",
" <td>103.8333</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>Nepal</td>\n",
" <td>28.1667</td>\n",
" <td>84.2500</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>NaN</td>\n",
" <td>Malaysia</td>\n",
" <td>2.5000</td>\n",
" <td>112.5000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 54 columns</p>\n",
"</div>"
],
"text/plain": [
" Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 \\\n",
"0 NaN Thailand 15.0000 101.0000 0 0 0 \n",
"1 NaN Japan 36.0000 138.0000 0 0 0 \n",
"2 NaN Singapore 1.2833 103.8333 0 0 0 \n",
"3 NaN Nepal 28.1667 84.2500 0 0 0 \n",
"4 NaN Malaysia 2.5000 112.5000 0 0 0 \n",
"\n",
" 1/25/20 1/26/20 1/27/20 ... 3/2/20 3/3/20 3/4/20 3/5/20 3/6/20 \\\n",
"0 0 0 0 ... 1 1 1 1 1 \n",
"1 0 0 0 ... 6 6 6 6 6 \n",
"2 0 0 0 ... 0 0 0 0 0 \n",
"3 0 0 0 ... 0 0 0 0 0 \n",
"4 0 0 0 ... 0 0 0 0 0 \n",
"\n",
" 3/7/20 3/8/20 3/9/20 3/10/20 3/11/20 \n",
"0 1 1 1 1 1 \n",
"1 6 6 10 10 15 \n",
"2 0 0 0 0 0 \n",
"3 0 0 0 0 0 \n",
"4 0 0 0 0 0 \n",
"\n",
"[5 rows x 54 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 12
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "5G7jZ5dPSX55",
"colab_type": "code",
"colab": {},
"outputId": "203b6f87-f61e-4248-ff05-50ed146b9b0d"
},
"source": [
"death_data_df.shape"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(404, 54)"
]
},
"metadata": {
"tags": []
},
"execution_count": 13
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "YDoLf_ouSX58",
"colab_type": "code",
"colab": {}
},
"source": [
"def preprocess(input_df):\n",
" \"\"\"Takes the raw dataframe andd groups it by country, Format needed for the hackathon.\"\"\"\n",
" \n",
" grp_confirm_data_df = input_df.groupby('Country/Region', as_index=False).sum()\n",
" grp_confirm_data_df.reset_index(inplace=True, drop=True)\n",
" return grp_confirm_data_df"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "IfyrizfQSX5_",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "n2EJLR49SX6E",
"colab_type": "text"
},
"source": [
"# Group based on the Country Name "
]
},
{
"cell_type": "code",
"metadata": {
"id": "WazacuXrSX6F",
"colab_type": "code",
"colab": {},
"outputId": "fedcb66b-0207-4a41-f727-af2fe8473ae6"
},
"source": [
"#lets groupby countries and take the daily counts.\n",
"\n",
"grp_confirm_data_df = confirm_data_df_old.groupby('Country/Region', as_index=False).sum()\n",
"grp_confirm_data_df.reset_index(inplace=True, drop=True)\n",
"grp_confirm_data_df.shape\n",
"# grp_fin_df.to_csv('Train_timeseries.csv', index=False)"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(114, 53)"
]
},
"metadata": {
"tags": []
},
"execution_count": 15
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "PX2p0_zFSX6H",
"colab_type": "code",
"colab": {},
"outputId": "346efabc-e27b-4a61-f658-b20345acb8a9"
},
"source": [
"#lets groupby countries and take the daily counts.\n",
"\n",
"grp_reco_data_df = reco_data_df.groupby('Country/Region', as_index=False).sum()\n",
"grp_reco_data_df.reset_index(inplace=True, drop=True)\n",
"grp_reco_data_df.shape\n",
"# grp_fin_df.to_csv('Train_timeseries.csv', index=False)"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(114, 53)"
]
},
"metadata": {
"tags": []
},
"execution_count": 18
}
]
},
{
"cell_type": "code",
"metadata": {
"scrolled": false,
"id": "Q88Ehh4cSX6J",
"colab_type": "code",
"colab": {},
"outputId": "aabae1d4-f672-4456-fb85-5e4c9db7c501"
},
"source": [
"#lets groupby countries and take the daily counts.\n",
"\n",
"grp_death_data_df = death_data_df.groupby('Country/Region', as_index=False).sum()\n",
"grp_death_data_df.reset_index(inplace=True, drop=True)\n",
"grp_death_data_df.shape\n",
"# grp_fin_df.to_csv('Train_timeseries.csv', index=False)"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(114, 53)"
]
},
"metadata": {
"tags": []
},
"execution_count": 19
}
]
},
{
"cell_type": "code",
"metadata": {
"scrolled": true,
"id": "8XCj50FESX6M",
"colab_type": "code",
"colab": {},
"outputId": "47ceed29-68e0-40d5-9300-a434f61349aa"
},
"source": [
"grp_confirm_data_df.head()"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Country/Region</th>\n",
" <th>Lat</th>\n",
" <th>Long</th>\n",
" <th>1/22/20</th>\n",
" <th>1/23/20</th>\n",
" <th>1/24/20</th>\n",
" <th>1/25/20</th>\n",
" <th>1/26/20</th>\n",
" <th>1/27/20</th>\n",
" <th>1/28/20</th>\n",
" <th>...</th>\n",
" <th>3/2/20</th>\n",
" <th>3/3/20</th>\n",
" <th>3/4/20</th>\n",
" <th>3/5/20</th>\n",
" <th>3/6/20</th>\n",
" <th>3/7/20</th>\n",
" <th>3/8/20</th>\n",
" <th>3/9/20</th>\n",
" <th>3/10/20</th>\n",
" <th>3/11/20</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>Afghanistan</td>\n",
" <td>33.0000</td>\n",
" <td>65.0000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>Albania</td>\n",
" <td>41.1533</td>\n",
" <td>20.1683</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>10</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>Algeria</td>\n",
" <td>28.0339</td>\n",
" <td>1.6596</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>12</td>\n",
" <td>12</td>\n",
" <td>17</td>\n",
" <td>17</td>\n",
" <td>19</td>\n",
" <td>20</td>\n",
" <td>20</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>Andorra</td>\n",
" <td>42.5063</td>\n",
" <td>1.5218</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>Argentina</td>\n",
" <td>-38.4161</td>\n",
" <td>-63.6167</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>8</td>\n",
" <td>12</td>\n",
" <td>12</td>\n",
" <td>17</td>\n",
" <td>19</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 53 columns</p>\n",
"</div>"
],
"text/plain": [
" Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 \\\n",
"0 Afghanistan 33.0000 65.0000 0 0 0 0 \n",
"1 Albania 41.1533 20.1683 0 0 0 0 \n",
"2 Algeria 28.0339 1.6596 0 0 0 0 \n",
"3 Andorra 42.5063 1.5218 0 0 0 0 \n",
"4 Argentina -38.4161 -63.6167 0 0 0 0 \n",
"\n",
" 1/26/20 1/27/20 1/28/20 ... 3/2/20 3/3/20 3/4/20 3/5/20 3/6/20 \\\n",
"0 0 0 0 ... 1 1 1 1 1 \n",
"1 0 0 0 ... 0 0 0 0 0 \n",
"2 0 0 0 ... 3 5 12 12 17 \n",
"3 0 0 0 ... 1 1 1 1 1 \n",
"4 0 0 0 ... 0 1 1 1 2 \n",
"\n",
" 3/7/20 3/8/20 3/9/20 3/10/20 3/11/20 \n",
"0 1 4 4 5 7 \n",
"1 0 0 2 10 12 \n",
"2 17 19 20 20 20 \n",
"3 1 1 1 1 1 \n",
"4 8 12 12 17 19 \n",
"\n",
"[5 rows x 53 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 20
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "6Bgf4KkkSX6O",
"colab_type": "code",
"colab": {},
"outputId": "b076757d-ecfb-41ad-eea5-a12264085509"
},
"source": [
"grp_reco_data_df.head()"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Country/Region</th>\n",
" <th>Lat</th>\n",
" <th>Long</th>\n",
" <th>1/22/20</th>\n",
" <th>1/23/20</th>\n",
" <th>1/24/20</th>\n",
" <th>1/25/20</th>\n",
" <th>1/26/20</th>\n",
" <th>1/27/20</th>\n",
" <th>1/28/20</th>\n",
" <th>...</th>\n",
" <th>3/2/20</th>\n",
" <th>3/3/20</th>\n",
" <th>3/4/20</th>\n",
" <th>3/5/20</th>\n",
" <th>3/6/20</th>\n",
" <th>3/7/20</th>\n",
" <th>3/8/20</th>\n",
" <th>3/9/20</th>\n",
" <th>3/10/20</th>\n",
" <th>3/11/20</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>Afghanistan</td>\n",
" <td>33.0000</td>\n",
" <td>65.0000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>Albania</td>\n",
" <td>41.1533</td>\n",
" <td>20.1683</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>Algeria</td>\n",
" <td>28.0339</td>\n",
" <td>1.6596</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>Andorra</td>\n",
" <td>42.5063</td>\n",
" <td>1.5218</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>Argentina</td>\n",
" <td>-38.4161</td>\n",
" <td>-63.6167</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 53 columns</p>\n",
"</div>"
],
"text/plain": [
" Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 \\\n",
"0 Afghanistan 33.0000 65.0000 0 0 0 0 \n",
"1 Albania 41.1533 20.1683 0 0 0 0 \n",
"2 Algeria 28.0339 1.6596 0 0 0 0 \n",
"3 Andorra 42.5063 1.5218 0 0 0 0 \n",
"4 Argentina -38.4161 -63.6167 0 0 0 0 \n",
"\n",
" 1/26/20 1/27/20 1/28/20 ... 3/2/20 3/3/20 3/4/20 3/5/20 3/6/20 \\\n",
"0 0 0 0 ... 0 0 0 0 0 \n",
"1 0 0 0 ... 0 0 0 0 0 \n",
"2 0 0 0 ... 0 0 0 0 0 \n",
"3 0 0 0 ... 0 0 0 0 0 \n",
"4 0 0 0 ... 0 0 0 0 0 \n",
"\n",
" 3/7/20 3/8/20 3/9/20 3/10/20 3/11/20 \n",
"0 0 0 0 0 0 \n",
"1 0 0 0 0 0 \n",
"2 0 0 0 0 0 \n",
"3 0 0 0 0 0 \n",
"4 0 0 0 0 0 \n",
"\n",
"[5 rows x 53 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 21
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "m36sYTiDSX6Q",
"colab_type": "code",
"colab": {},
"outputId": "c6798315-5f4b-41d0-8ffa-bba8b6b40ec4"
},
"source": [
"grp_death_data_df.head()"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Country/Region</th>\n",
" <th>Lat</th>\n",
" <th>Long</th>\n",
" <th>1/22/20</th>\n",
" <th>1/23/20</th>\n",
" <th>1/24/20</th>\n",
" <th>1/25/20</th>\n",
" <th>1/26/20</th>\n",
" <th>1/27/20</th>\n",
" <th>1/28/20</th>\n",
" <th>...</th>\n",
" <th>3/2/20</th>\n",
" <th>3/3/20</th>\n",
" <th>3/4/20</th>\n",
" <th>3/5/20</th>\n",
" <th>3/6/20</th>\n",
" <th>3/7/20</th>\n",
" <th>3/8/20</th>\n",
" <th>3/9/20</th>\n",
" <th>3/10/20</th>\n",
" <th>3/11/20</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>Afghanistan</td>\n",
" <td>33.0000</td>\n",
" <td>65.0000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>Albania</td>\n",
" <td>41.1533</td>\n",
" <td>20.1683</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>Algeria</td>\n",
" <td>28.0339</td>\n",
" <td>1.6596</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>Andorra</td>\n",
" <td>42.5063</td>\n",
" <td>1.5218</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>Argentina</td>\n",
" <td>-38.4161</td>\n",
" <td>-63.6167</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 53 columns</p>\n",
"</div>"
],
"text/plain": [
" Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 \\\n",
"0 Afghanistan 33.0000 65.0000 0 0 0 0 \n",
"1 Albania 41.1533 20.1683 0 0 0 0 \n",
"2 Algeria 28.0339 1.6596 0 0 0 0 \n",
"3 Andorra 42.5063 1.5218 0 0 0 0 \n",
"4 Argentina -38.4161 -63.6167 0 0 0 0 \n",
"\n",
" 1/26/20 1/27/20 1/28/20 ... 3/2/20 3/3/20 3/4/20 3/5/20 3/6/20 \\\n",
"0 0 0 0 ... 0 0 0 0 0 \n",
"1 0 0 0 ... 0 0 0 0 0 \n",
"2 0 0 0 ... 0 0 0 0 0 \n",
"3 0 0 0 ... 0 0 0 0 0 \n",
"4 0 0 0 ... 0 0 0 0 0 \n",
"\n",
" 3/7/20 3/8/20 3/9/20 3/10/20 3/11/20 \n",
"0 0 0 0 0 0 \n",
"1 0 0 0 0 1 \n",
"2 0 0 0 0 0 \n",
"3 0 0 0 0 0 \n",
"4 0 1 1 1 1 \n",
"\n",
"[5 rows x 53 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 22
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Z0hyPiTcSX6S",
"colab_type": "code",
"colab": {},
"outputId": "bc56c8df-ef97-4a63-8669-fb7a0a93ca03"
},
"source": [
"grp_confirm_data_df[grp_confirm_data_df['Country/Region'] == 'India']"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Country/Region</th>\n",
" <th>Lat</th>\n",
" <th>Long</th>\n",
" <th>1/22/20</th>\n",
" <th>1/23/20</th>\n",
" <th>1/24/20</th>\n",
" <th>1/25/20</th>\n",
" <th>1/26/20</th>\n",
" <th>1/27/20</th>\n",
" <th>1/28/20</th>\n",
" <th>...</th>\n",
" <th>3/2/20</th>\n",
" <th>3/3/20</th>\n",
" <th>3/4/20</th>\n",
" <th>3/5/20</th>\n",
" <th>3/6/20</th>\n",
" <th>3/7/20</th>\n",
" <th>3/8/20</th>\n",
" <th>3/9/20</th>\n",
" <th>3/10/20</th>\n",
" <th>3/11/20</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>48</td>\n",
" <td>India</td>\n",
" <td>21.0</td>\n",
" <td>78.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>28</td>\n",
" <td>30</td>\n",
" <td>31</td>\n",
" <td>34</td>\n",
" <td>39</td>\n",
" <td>43</td>\n",
" <td>56</td>\n",
" <td>62</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1 rows × 53 columns</p>\n",
"</div>"
],
"text/plain": [
" Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 \\\n",
"48 India 21.0 78.0 0 0 0 0 0 \n",
"\n",
" 1/27/20 1/28/20 ... 3/2/20 3/3/20 3/4/20 3/5/20 3/6/20 3/7/20 \\\n",
"48 0 0 ... 5 5 28 30 31 34 \n",
"\n",
" 3/8/20 3/9/20 3/10/20 3/11/20 \n",
"48 39 43 56 62 \n",
"\n",
"[1 rows x 53 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 23
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "VYLhBWadSX6U",
"colab_type": "code",
"colab": {},
"outputId": "6213777a-0bf2-410f-9afb-58764d8fdd35"
},
"source": [
"grp_reco_data_df[grp_reco_data_df['Country/Region'] == 'India']"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Country/Region</th>\n",
" <th>Lat</th>\n",
" <th>Long</th>\n",
" <th>1/22/20</th>\n",
" <th>1/23/20</th>\n",
" <th>1/24/20</th>\n",
" <th>1/25/20</th>\n",
" <th>1/26/20</th>\n",
" <th>1/27/20</th>\n",
" <th>1/28/20</th>\n",
" <th>...</th>\n",
" <th>3/2/20</th>\n",
" <th>3/3/20</th>\n",
" <th>3/4/20</th>\n",
" <th>3/5/20</th>\n",
" <th>3/6/20</th>\n",
" <th>3/7/20</th>\n",
" <th>3/8/20</th>\n",
" <th>3/9/20</th>\n",
" <th>3/10/20</th>\n",
" <th>3/11/20</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>48</td>\n",
" <td>India</td>\n",
" <td>21.0</td>\n",
" <td>78.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1 rows × 53 columns</p>\n",
"</div>"
],
"text/plain": [
" Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 \\\n",
"48 India 21.0 78.0 0 0 0 0 0 \n",
"\n",
" 1/27/20 1/28/20 ... 3/2/20 3/3/20 3/4/20 3/5/20 3/6/20 3/7/20 \\\n",
"48 0 0 ... 3 3 3 3 3 3 \n",
"\n",
" 3/8/20 3/9/20 3/10/20 3/11/20 \n",
"48 3 3 4 4 \n",
"\n",
"[1 rows x 53 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 24
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "AedT-JycSX6W",
"colab_type": "code",
"colab": {},
"outputId": "3bc221b4-78e3-4f8c-a168-3529651e81e3"
},
"source": [
"grp_death_data_df[grp_death_data_df['Country/Region'] == 'India']"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Country/Region</th>\n",
" <th>Lat</th>\n",
" <th>Long</th>\n",
" <th>1/22/20</th>\n",
" <th>1/23/20</th>\n",
" <th>1/24/20</th>\n",
" <th>1/25/20</th>\n",
" <th>1/26/20</th>\n",
" <th>1/27/20</th>\n",
" <th>1/28/20</th>\n",
" <th>...</th>\n",
" <th>3/2/20</th>\n",
" <th>3/3/20</th>\n",
" <th>3/4/20</th>\n",
" <th>3/5/20</th>\n",
" <th>3/6/20</th>\n",
" <th>3/7/20</th>\n",
" <th>3/8/20</th>\n",
" <th>3/9/20</th>\n",
" <th>3/10/20</th>\n",
" <th>3/11/20</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>48</td>\n",
" <td>India</td>\n",
" <td>21.0</td>\n",
" <td>78.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1 rows × 53 columns</p>\n",
"</div>"
],
"text/plain": [
" Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 \\\n",
"48 India 21.0 78.0 0 0 0 0 0 \n",
"\n",
" 1/27/20 1/28/20 ... 3/2/20 3/3/20 3/4/20 3/5/20 3/6/20 3/7/20 \\\n",
"48 0 0 ... 0 0 0 0 0 0 \n",
"\n",
" 3/8/20 3/9/20 3/10/20 3/11/20 \n",
"48 0 0 0 1 \n",
"\n",
"[1 rows x 53 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 25
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Jsep8iE5SX6Y",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "eyckoQ4ESX6a",
"colab_type": "text"
},
"source": [
"# Predict using Moving Average "
]
},
{
"cell_type": "code",
"metadata": {
"id": "WoCQ0WpPSX6b",
"colab_type": "code",
"colab": {}
},
"source": [
"def cal_moving_avg(input_df, window=2):\n",
" \"\"\"Calculates Moving avg using the window size.\"\"\"\n",
" _df = input_df.rolling(window, axis=1).median()['3/10/20']\n",
" \n",
" return _df\n",
"\n",
"def cal_ewma(input_df, comm=0.3):\n",
" \"\"\"Calculates the exp wighted moving average using the window size.\"\"\"\n",
" _df = input_df.ewm(com=comm).mean()['3/10/20']\n",
" \n",
" return _df"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "fT9iU5z_SX6d",
"colab_type": "code",
"colab": {}
},
"source": [
"all_pred_df = pd.DataFrame()\n",
"all_pred_df_ew = pd.DataFrame()\n",
"\n",
"for df in [grp_confirm_data_df, grp_reco_data_df, grp_death_data_df]:\n",
" _df = cal_moving_avg(df, window=2)\n",
" ew_df = cal_ewma(df, comm=0.4)\n",
" all_pred_df = pd.concat([all_pred_df, _df], axis=1)\n",
" all_pred_df_ew = pd.concat([all_pred_df_ew, ew_df], axis=1)\n",
" \n",
"all_pred_df.columns =['Confirmed', 'Recovered', 'Death'] \n",
"all_pred_df_ew.columns =['Confirmed', 'Recovered', 'Death'] "
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "I4xbvKgTSX6f",
"colab_type": "code",
"colab": {},
"outputId": "c633a4c7-7032-40fd-a043-82b26d8db7c5"
},
"source": [
"all_pred_df"
],
"execution_count": 0,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Confirmed</th>\n",
" <th>Recovered</th>\n",
" <th>Death</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>4.5</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>6.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>20.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>14.5</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>109</td>\n",
" <td>1124.0</td>\n",
" <td>7.5</td>\n",
" <td>25.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>110</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>111</td>\n",
" <td>59.5</td>\n",
" <td>9.5</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>112</td>\n",
" <td>353.0</td>\n",
" <td>18.5</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>113</td>\n",
" <td>30.5</td>\n",
" <td>16.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>114 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" Confirmed Recovered Death\n",
"0 4.5 0.0 0.0\n",
"1 6.0 0.0 0.0\n",
"2 20.0 0.0 0.0\n",
"3 1.0 0.0 0.0\n",
"4 14.5 0.0 1.0\n",
".. ... ... ...\n",
"109 1124.0 7.5 25.0\n",
"110 1.0 0.0 0.0\n",
"111 59.5 9.5 0.0\n",
"112 353.0 18.5 5.0\n",
"113 30.5 16.0 0.0\n",
"\n",
"[114 rows x 3 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 28
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "TMOVR2GsSX6h",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "lHdlZEplSX6j",
"colab_type": "code",
"colab": {}
},
"source": [
"all_pred_df.to_excel(\"Sub_Mov_Median.xlsx\", index=False)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "n3vhNGLcSX6k",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment