Skip to content

Instantly share code, notes, and snippets.

@MJacobs1985
Last active March 13, 2023 23:28
Show Gist options
  • Save MJacobs1985/cda7342434e371bba58ddcfa58106124 to your computer and use it in GitHub Desktop.
Save MJacobs1985/cda7342434e371bba58ddcfa58106124 to your computer and use it in GitHub Desktop.
/* LETS LOOK AT THE WORLD DATA */
FILENAME REFFILE '/Data/owid-covid-data.xlsx';
PROC IMPORT DATAFILE=REFFILE DBMS=XLSX OUT=WORK.IMPORT;GETNAMES=YES;RUN;
proc freq data=import nlevels;table date;run;
proc freq data=import nlevels;table location;run;
data import; set import;
new = input(date,yymmdd10.);
drop date;
rename new=date;
run;
data import;set import;format date date9.;run;
data selection; set import;
where location in ('Belgium', 'France', 'Germany', 'Netherlands', 'Sweden', 'Norway',
'Denmark', 'United Kingdom', 'Spain', 'Italy', 'Israel', 'South Africa', 'Brazil', 'Portugal');run;
/* Lets keep only the columns that really matter */
data selection
(keep= date date total_cases_per_million location hosp_patients hosp_patients_per_million icu_patients new_cases new_cases_per_million new_deaths new_tests new_vaccinations positive_rate
reproduction_rate tests_per_case total_cases total_deaths total_tests total_vaccinations population stringency_index
people_vaccinated people_fully_vaccinated new_cases_smoothed_per_million icu_patients_per_million new_tests_smoothed_per_thousand new_deaths_per_million);
set selection;
run;
/* To run any of the time-series analysis there CANNOT be or MAYNOT be any missing values
However, instead of to replace the missings by zero or a mean, it is better to just use a more narrow window of analysis */
proc means data=WORK.selection chartype mean std min max n vardef=df nmiss;
var total_cases new_cases total_deaths new_deaths reproduction_rate new_tests_smoothed_per_thousand
icu_patients hosp_patients total_tests new_tests positive_rate tests_per_case
total_vaccinations new_vaccinations people_vaccinated people_fully_vaccinated population icu_patients_per_million new_deaths_per_million;run;
data selection; set selection;
if new_cases <0 then new_cases =0; else new_cases = new_cases;
if new_tests <0 then new_tests =0; else new_cases = new_cases;
if total_vaccinations=. then total_vaccinations =0; else total_vaccinations = total_vaccinations;
if people_vaccinated=. then people_vaccinated =0; else people_vaccinated = people_vaccinated;
if people_fully_vaccinated=. then people_fully_vaccinated =0; else people_fully_vaccinated = people_fully_vaccinated;
if positive_rate=. then positive_rate =0; else positive_rate = positive_rate;
if tests_per_case=. then tests_per_case =0; else tests_per_case = tests_per_case;
if total_cases_per_million=. then total_cases_per_million =0; else total_cases_per_million = total_cases_per_million;
percentage_new_cases = (new_cases / population) *100;
percentage_total_cases = (total_cases / population) *100;
percentage_hosp_patients = (hosp_patients / population) *100;
percentage_icu_patients = (icu_patients / population) *100;
percentage_new_deaths = (new_deaths / population) *100;
percentage_total_deaths = (total_deaths / population) *100;
percentage_total_vaccinations = (total_vaccinations / population) *100;
percentage_people_vaccinated = (people_vaccinated / population) *100;
perc_people_fully_vaccinated = (people_fully_vaccinated / population) *100;
run;
/* RIVM DATA */
FILENAME REFFILE 'Data/RR_NL.xlsx';
PROC IMPORT DATAFILE=REFFILE
DBMS=XLSX
OUT=WORK.RR;
GETNAMES=YES;
RUN;
data netherlands;set selection; where location in ('Netherlands');run;
data RR_NL; set netherlands (keep=date stringency_index reproduction_rate new_tests_smoothed_per_thousand);run;
proc sort data=RR; by date;run;
proc sort data=RR_NL; by date;run;
data curfew; merge RR_NL RR; by date; run;
data curfew;set curfew;
CIdist=Rt_up-Rt_low;
changeRR=((Rt_avg - lag14(Rt_avg))/lag14(Rt_avg))*100;
changeRR2=((Reproduction_rate - lag14(Reproduction_rate))/lag14(Reproduction_rate))*100;
changeRR_low=((Rt_low - lag14(Rt_low))/lag14(Rt_low))*100;
changeRR_up=((Rt_up - lag14(Rt_up))/lag14(Rt_up))*100;
changeRR_7=((Rt_avg - lag7(Rt_avg))/lag7(Rt_avg))*100;
changeRR2_7=((Reproduction_rate - lag7(Reproduction_rate))/lag7(Reproduction_rate))*100;
changeRR_low_7=((Rt_low - lag7(Rt_low))/lag7(Rt_low))*100;
changeRR_up_7=((Rt_up - lag7(Rt_up))/lag7(Rt_up))*100;
run;
data netherlands;set netherlands;if percentage_hosp_patients=. then percentage_hosp_patients=0.0017725; else percentage_hosp_patients=percentage_hosp_patients;run;
data netherlands;set netherlands;if perc_people_fully_vaccinated=0 then perc_people_fully_vaccinated2=.; else perc_people_fully_vaccinated2=perc_people_fully_vaccinated;run;
ods graphics / imagefmt=svg width=10in height=5in;
proc sgplot data=NETHERLANDS;
where '27FEB2020'd <= date <= '11DEC2021'd;
needle x=date y=perc_people_fully_vaccinated2 / lineattrs=(color=grey) markers markerattrs=(color=grey) transparency=0.7 legendlabel='Complete Vaccination' name='CV' y2axis;
series x=date y=stringency_index / lineattrs=(color=red pattern=2) legendlabel='Stringency Index' name='SI' y2axis;
series x=date y=icu_patients_per_million / lineattrs=(color=orange) legendlabel='ICU Patients (per million)' name='ICU' y2axis;
series x=date y=hosp_patients_per_million / lineattrs=(color=green pattern=3) legendlabel='Hospital Patients (per million)' name='HP';
xaxis type=time label='Time (days)';
yaxis label='Hopsital Patients (per million)';
y2axis label='ICU Patients (per million), Lockdown Stringency Index & Percentage Complete Vax ';
title 'Covid-19 ICU & Hospital Patients in relationship to Complete Vaccinations and the Lockdown Stringency for the Netherlands';
keylegend 'SI' 'CV' 'ICU' 'HP';
run;
proc sgplot data=NETHERLANDS;
where '27FEB2020'd <= date <= '11DEC2021'd;
needle x=date y=perc_people_fully_vaccinated2 / lineattrs=(color=grey) markers markerattrs=(color=grey) transparency=0.7 legendlabel='Complete Vaccination' name='CV' y2axis;
series x=date y=stringency_index / lineattrs=(color=red pattern=2) legendlabel='Stringency Index' name='SI' y2axis;
series x=date y=new_deaths_per_million / lineattrs=(color=orange) legendlabel='Deaths (per million)' name='Death' y2axis;
series x=date y=new_cases_per_million / lineattrs=(color=green) legendlabel='Cases (per million)' name='Case';
xaxis type=time label='Time (days)';
yaxis label='New Cases (per million)';
y2axis label='Lockdown Stringency Index & New Deaths (per million)';
title 'Covid-19 New Deaths & New Cases (per million) in relationship to Complete Vaccinations and the Lockdown Stringency for the Netherlands';
keylegend 'SI' 'CV' 'Death' 'Case';
run;
/* Extend the data to let the model do forecasts on exogenous data
I will keep the rise in reproduction rate, perc_people_fully_vaccinatied and play with */
data netherlands;set netherlands;
if date => '31JAN2021'd and perc_people_fully_vaccinated=0 then perc_people_fully_vaccinated2=.; else perc_people_fully_vaccinated2=perc_people_fully_vaccinated;run;
proc expand data=netherlands out=try plots=(input output) ;
id date;
convert perc_people_fully_vaccinated2 / observed=(beginning,average);
convert hosp_patients_per_million / observed=(beginning,average);
convert hosp_patients / observed=(beginning,average);
run;
proc expand data=try out=try2 plots=(input output) extrapolate ;
id date;
convert reproduction_rate;
convert stringency_index;
convert perc_people_fully_vaccinated2;
run;
proc sort data=try2 out=Work.preProcessedData;by date;run;
proc varmax data=Work.preProcessedData plots(only)=(forecasts impulse model residual)
outest=work.outest covout outstat=work.outstat;
id date interval=day;
where '22MAR2020'd <= date <= '31DEC2021'd;
model hosp_patients_per_million icu_patients_per_million new_deaths_per_million=
reproduction_rate
stringency_index
perc_people_fully_vaccinated2
/ cointtest=(johansen=(type=max))
dftest p=14 q=1 trend=quad nseason=7
print=(corrb covb diagnose impulse impulsex roots yw);
cointeg rank=2;
nloptions maxiter=500 tech=nrridg;
output out=work.out_increase lead=60 back=22 alpha=0.50 ;
causal group1=(icu_patients_per_million) group2=(reproduction_rate );
causal group1=(icu_patients_per_million) group2=(stringency_index );
causal group1=(icu_patients_per_million) group2=(perc_people_fully_vaccinated2 );
causal group1=(icu_patients_per_million ) group2=(reproduction_rate stringency_index perc_people_fully_vaccinated2);
causal group1=(hosp_patients_per_million) group2=(reproduction_rate );
causal group1=(hosp_patients_per_million) group2=(stringency_index );
causal group1=(hosp_patients_per_million) group2=(perc_people_fully_vaccinated2 );
causal group1=(hosp_patients_per_million ) group2=(reproduction_rate stringency_index perc_people_fully_vaccinated2);
causal group1=(new_deaths_per_million) group2=(reproduction_rate );
causal group1=(new_deaths_per_million) group2=(stringency_index );
causal group1=(new_deaths_per_million) group2=(perc_people_fully_vaccinated2 );
causal group1=(new_deaths_per_million ) group2=(reproduction_rate stringency_index perc_people_fully_vaccinated2);
run;
/* Plot ICU Predictions */
ods graphics / imagefmt=svg width=10in height=6in;
proc sgplot data=work.out_increase noautolegend;
band x=date lower=LCI2 upper=UCI2 / transparency=0.8 fillattrs=(color=blue);
series x=date y=icu_patients_per_million / lineattrs=(color=black) legendlabel="ICU Patients" name="ICU";
series x=date y=FOR2 / legendlabel="Forecasted" name="f" lineattrs=(color=blue pattern=2);
xaxis type=time label='Time (days)' grid;
yaxis label='ICU Patients' grid;
title 'VECMX(14,0) model to predict # NL ICU Patients per Million over Time';
run;
/* Plot Hospital Predictions */
ods graphics / imagefmt=svg width=10in height=6in;
proc sgplot data=work.out_increase noautolegend;
band x=date lower=LCI1 upper=UCI1 / transparency=0.8 fillattrs=(color=blue);
series x=date y=hosp_patients_per_million / lineattrs=(color=black) legendlabel="Hospital Patients" name="HP";
series x=date y=FOR1 / legendlabel="Forecasted" name="f" lineattrs=(color=blue pattern=2);
xaxis type=time label='Time (days)' grid;
yaxis label='Hospital Patients' grid;
title 'VECMX(14,0) model to predict # NL Hospital Patients per Million over Time';
run;
/* Plot New Deaths */
ods graphics / imagefmt=svg width=10in height=6in;
proc sgplot data=work.out_increase noautolegend;
band x=date lower=LCI3 upper=UCI3 / transparency=0.8 fillattrs=(color=blue);
series x=date y=new_deaths_per_million / lineattrs=(color=black) legendlabel="New Deaths" name="HP";
series x=date y=FOR3/ legendlabel="Forecasted" name="f" lineattrs=(color=blue pattern=2);
xaxis type=time label='Time (days)' grid;
yaxis label='New Deaths' grid;
title 'VECMX(14,0) model to predict # NL New Deaths per Million over Time';
run;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment