Caellwyn/facebook_prophet_prepare_data.ipynb

## facebook_prophet_prepare_data.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "facebook_prophet_prepare_data.ipynb",
      "private_outputs": true,
      "provenance": [],
      "collapsed_sections": [],
      "authorship_tag": "ABX9TyNKEbDRjh4s3Jd27aunoPx0",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/Caellwyn/cc0cd967d06952ca640f9e6b08d70fa9/facebook_prophet_prepare_data.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zOPb6b-Tvo5u"
      },
      "source": [
        "import pandas as pd\r\n",
        "#!pip install fbprophet\r\n",
        "from fbprophet import Prophet\r\n",
        "\r\n",
        "division = 'country'  #regional data is available for some countries\r\n",
        "region = 'United States'\r\n",
        "prediction = 'ConfirmedCases' #ConfirmedDeaths is also available for forecasting.\r\n",
        "\r\n",
        "#get the latest data from OxCGRT\r\n",
        "DATA_URL = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/OxCGRT_latest.csv'\r\n",
        "full_df = pd.read_csv(DATA_URL,\r\n",
        "                usecols=['Date','CountryName','RegionName','Jurisdiction',\r\n",
        "                           'ConfirmedCases','ConfirmedDeaths'],\r\n",
        "                parse_dates=['Date'],\r\n",
        "                encoding=\"ISO-8859-1\",\r\n",
        "                dtype={\"RegionName\": str,\r\n",
        "                        \"CountryName\":str})\r\n",
        "\r\n",
        "#Filter the region we want to predict\r\n",
        "if division == 'country':\r\n",
        "    df = full_df[(full_df['Jurisdiction'] == 'NAT_TOTAL') & (full_df['CountryName'] == region)][:-1]\r\n",
        "elif division == 'state':\r\n",
        "    df = full_df[(full_df['Jurisdiction'] == 'STATE_TOTAL') & (full_df['RegionName'] == region)][:-1]\r\n",
        "\r\n",
        "#Since we are not using exogenous variables, we just keep the dates and endogenous data\r\n",
        "df = df[['Date',prediction]].rename(columns = {'Date':'ds', prediction:'y'})"
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"name": "facebook_prophet_prepare_data.ipynb",
	"private_outputs": true,
	"provenance": [],
	"collapsed_sections": [],
	"authorship_tag": "ABX9TyNKEbDRjh4s3Jd27aunoPx0",
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/Caellwyn/cc0cd967d06952ca640f9e6b08d70fa9/facebook_prophet_prepare_data.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "code",
	"metadata": {
	"id": "zOPb6b-Tvo5u"
	},
	"source": [
	"import pandas as pd\r\n",
	"#!pip install fbprophet\r\n",
	"from fbprophet import Prophet\r\n",
	"\r\n",
	"division = 'country' #regional data is available for some countries\r\n",
	"region = 'United States'\r\n",
	"prediction = 'ConfirmedCases' #ConfirmedDeaths is also available for forecasting.\r\n",
	"\r\n",
	"#get the latest data from OxCGRT\r\n",
	"DATA_URL = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/OxCGRT_latest.csv'\r\n",
	"full_df = pd.read_csv(DATA_URL,\r\n",
	" usecols=['Date','CountryName','RegionName','Jurisdiction',\r\n",
	" 'ConfirmedCases','ConfirmedDeaths'],\r\n",
	" parse_dates=['Date'],\r\n",
	" encoding=\"ISO-8859-1\",\r\n",
	" dtype={\"RegionName\": str,\r\n",
	" \"CountryName\":str})\r\n",
	"\r\n",
	"#Filter the region we want to predict\r\n",
	"if division == 'country':\r\n",
	" df = full_df[(full_df['Jurisdiction'] == 'NAT_TOTAL') & (full_df['CountryName'] == region)][:-1]\r\n",
	"elif division == 'state':\r\n",
	" df = full_df[(full_df['Jurisdiction'] == 'STATE_TOTAL') & (full_df['RegionName'] == region)][:-1]\r\n",
	"\r\n",
	"#Since we are not using exogenous variables, we just keep the dates and endogenous data\r\n",
	"df = df[['Date',prediction]].rename(columns = {'Date':'ds', prediction:'y'})"
	],
	"execution_count": null,
	"outputs": []
	}
	]
	}