Skip to content

Instantly share code, notes, and snippets.

@romeokienzler
Created April 16, 2020 17:27
Show Gist options
  • Save romeokienzler/badcd9a08afc8e360d8053d9c0ca157a to your computer and use it in GitHub Desktop.
Save romeokienzler/badcd9a08afc8e360d8053d9c0ca157a to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": "# The code was removed by Watson Studio for sharing."
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": "import types\nimport pandas as pd\nfrom botocore.client import Config\nimport ibm_boto3\n\ndef __iter__(self): return 0\n\n\nclient = ibm_boto3.client(service_name='s3',\n ibm_api_key_id=credentials_1['IBM_API_KEY_ID'],\n ibm_auth_endpoint=credentials_1['IBM_AUTH_ENDPOINT'],\n config=Config(signature_version='oauth'),\n endpoint_url=credentials_1['ENDPOINT'])\nbucket=credentials_1['BUCKET']"
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": "client.download_file(Bucket=bucket,Key='cases.csv',Filename='cases.csv')"
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": "df_raw = pd.read_csv('cases.csv')"
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": "['Austria',\n 'Belgium',\n 'Canada',\n 'Czechia',\n 'Denmark',\n 'Dominican_Republic',\n 'Ecuador',\n 'France',\n 'Germany',\n 'Hungary',\n 'Iran',\n 'Ireland',\n 'Israel',\n 'Italy',\n 'Netherlands',\n 'Norway',\n 'Portugal',\n 'Romania',\n 'Spain',\n 'Sweden',\n 'Switzerland',\n 'Turkey',\n 'United_Kingdom',\n 'United_States_of_America']"
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": "window_size = 10\n\ndf = df_raw\n\n#df = df[df['countriesAndTerritories']=='Italy']\n\ndf = df.sort_values(['year', 'month' ,'day'], ascending=[1, 1, 1])\n\ndf['ts'] = pd.to_datetime(df[['year', 'month' ,'day']])\n\ndf['increase_cases'] = df.cases - df.cases.shift(1)\n\ndf['increase_ratio'] = df.cases / df.cases.shift(1)\n\ndf['cases_estimated'] = df.deaths*100\n\ndf['total_cases_estimated'] = df['cases_estimated'].cumsum()\n\ndf['percentage_infected'] = 100/df[\"2018\"]*df['total_cases_estimated']\n\ndf['percentage_died'] = 100/df[\"2018\"]*df['deaths']\n\n\nfor i in range(1,window_size+1):\n df['cases_'+str(i)+'_days_before'] = df.cases.shift(i)\n \n \nfor i in range(1,window_size+1):\n df['percentage_died_'+str(i)+'_days_before'] = df.percentage_died.shift(i)\n \n \nfor i in range(1,window_size+1):\n df['deaths_'+str(i)+'_days_before'] = df.deaths.shift(i)\n\naverage = 0\nfor i in range(1,window_size+1):\n average = average + df.cases.shift(i)\n \ndf['cases_'+str(window_size)+'_day_average'] = average/window_size\n\naverage = 0\nfor i in range(1,window_size+1):\n average = average + df.percentage_died.shift(i)\n \ndf['percentage_died_'+str(window_size)+'_day_average'] = average/window_size\n\naverage = 0\nfor i in range(1,window_size+1):\n average = average + df.deaths.shift(i)\n \ndf['deaths_'+str(window_size)+'_day_average'] = average/window_size\n\n#for i in range(1,window_size+1):\n# df = df[df['deaths_'+str(i)+'_days_before']>0]\n\n#df = df[df['Cases']>100]\n\n\n#df = df[df['ts']>'2020-04-01']\n\ngroup_by_deaths = df.groupby(['countriesAndTerritories']).sum()['percentage_died']\nbad_countries = list(group_by_deaths[group_by_deaths>0.001].index)\n\n\n\n\n#df = df[df['countriesAndTerritories'].isin(bad_countries)]\n#df\nbad_countries\n\n\n#Germany 1607\n#Italy 15253\n#Netherlands 1538\n#Spain 11570\n#United_Kingdom 4972\n#United_States_of_America 10973"
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": "df.to_csv('cases_features.csv')"
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": "client.upload_file(Filename='cases_features.csv',Bucket=bucket,Key='cases_features.csv')"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
@wouche2
Copy link

wouche2 commented Apr 16, 2020

Hi Romeo,
I was wondering in section 5: while first sorting your csv at date and then using the shift function to calculate the increase_cases column, aren't you comparing cases from different countries instead of different days? As I'm now reading it, I would assume we want to compare the cases for the same country.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment