Skip to content

Instantly share code, notes, and snippets.

@thoughtfulbloke
Created May 21, 2022 08:49
Show Gist options
  • Save thoughtfulbloke/6ec3f75a55a262e0392dade3114d4cb2 to your computer and use it in GitHub Desktop.
Save thoughtfulbloke/6ec3f75a55a262e0392dade3114d4cb2 to your computer and use it in GitHub Desktop.
library(rvest) # process web pages
library(dplyr) # general data pipeleine tools
library(lubridate) # date functions
# saving demographic pages as html into a folder
# called case_demografics_html_asof
# with a datestamp name for the contents
Demographs <- list.files("../case_demografics_html_asof", pattern=".*html$")
extract_vax <- function(x) {
file_w._path <- paste0("../case_demografics_html_asof/",x)
report_date <- ymd(x)
# extract the files with rvest
tbls <- file_w._path %>%
read_html() %>%
html_nodes("table") %>%
html_table(header=TRUE)
# in the entire period the vaccine data is table 4
newHos <- as.data.frame(tbls[[4]])
newHos$asOf <- report_date
return(newHos)
}
# extract the table for each webpage
list_of_reports <- lapply(Demographs, extract_vax)
# turn the list of tables into one table
df_reports <- bind_rows(list_of_reports) %>%
rename(Vax=1,under12cases=2,under12hosptalised=3, ReportDate=4) %>%
filter(Vax %in% c("Not eligible for vaccination at the time they were reported as a case",
"Not eligible for vaccine due to being less than 12 years old")) %>%
select(ReportDate, under12cases) %>%
arrange(ReportDate) %>%
mutate(new_cases = under12cases - lag(under12cases))
write.csv(df_reports, file="~/Desktop/under12.csv", row.names=FALSE)
ReportDate under12cases new_cases
2021-12-03 2083 NA
2021-12-04 2105 22
2021-12-05 2131 26
2021-12-06 2167 36
2021-12-09 2245 78
2021-12-10 2267 22
2021-12-11 2287 20
2021-12-12 2318 31
2021-12-13 2352 34
2021-12-14 2379 27
2021-12-15 2397 18
2021-12-16 2423 26
2021-12-17 2450 27
2021-12-18 2457 7
2021-12-19 2468 11
2021-12-20 2485 17
2021-12-21 2498 13
2021-12-22 2512 14
2021-12-23 2528 16
2021-12-26 2590 62
2021-12-27 2598 8
2021-12-28 2603 5
2021-12-29 2620 17
2021-12-30 2645 25
2021-12-31 2660 15
2022-01-02 2697 37
2022-01-03 2702 5
2022-01-04 2710 8
2022-01-05 2714 4
2022-01-06 2719 5
2022-01-09 2745 26
2022-01-10 2754 9
2022-01-11 2755 1
2022-01-12 2762 7
2022-01-13 2766 4
2022-01-14 2769 3
2022-01-15 2771 2
2022-01-16 2778 7
2022-01-17 2781 3
2022-01-18 2786 5
2022-01-19 2791 5
2022-01-20 2797 6
2022-01-21 2802 5
2022-01-22 2814 12
2022-01-23 2816 2
2022-01-24 2822 6
2022-01-25 2828 6
2022-01-26 2828 0
2022-01-27 2839 11
2022-01-28 2852 13
2022-01-29 2858 6
2022-01-30 2875 17
2022-01-31 2885 10
2022-02-01 2901 16
2022-02-02 2920 19
2022-02-03 2946 26
2022-02-04 2979 33
2022-02-05 3013 34
2022-02-06 3049 36
2022-02-07 3080 31
2022-02-08 3117 37
2022-02-09 3154 37
2022-02-10 3193 39
2022-02-11 3268 75
2022-02-12 3343 75
2022-02-14 3419 76
2022-02-15 3509 90
2022-02-16 3895 386
2022-02-17 4133 238
2022-02-18 4437 304
2022-02-19 4645 208
2022-02-20 5022 377
2022-02-21 5337 315
2022-02-22 5726 389
2022-02-23 6150 424
2022-02-24 6607 457
2022-02-25 7716 1109
2022-02-26 9329 1613
2022-02-27 11328 1999
2022-02-28 13227 1899
2022-03-01 15563 2336
2022-03-02 18860 3297
2022-03-03 21754 2894
2022-03-04 24834 3080
2022-03-05 27480 2646
2022-03-06 29875 2395
2022-03-07 32399 2524
2022-03-08 35666 3267
2022-03-09 38800 3134
2022-03-10 41503 2703
2022-03-11 44877 3374
2022-03-12 48324 3447
2022-03-13 50892 2568
2022-03-14 53654 2762
2022-03-15 57488 3834
2022-03-16 60866 3378
2022-03-17 64005 3139
2022-03-18 66428 2423
2022-03-19 70193 3765
2022-03-20 72581 2388
2022-03-21 75317 2736
2022-03-22 79415 4098
2022-03-23 83107 3692
2022-03-24 86284 3177
2022-03-25 89226 2942
2022-03-26 91991 2765
2022-03-27 93916 1925
2022-03-28 96571 2655
2022-03-29 100478 3907
2022-03-30 103404 2926
2022-03-31 105691 2287
2022-04-01 108222 2531
2022-04-02 110074 1852
2022-04-03 112232 2158
2022-04-04 114128 1896
2022-04-05 116888 2760
2022-04-06 119312 2424
2022-04-07 121287 1975
2022-04-08 122962 1675
2022-04-09 124586 1624
2022-04-10 125892 1306
2022-04-11 127234 1342
2022-04-12 129269 2035
2022-04-13 130878 1609
2022-04-14 132367 1489
2022-04-15 133512 1145
2022-04-16 134460 948
2022-04-17 135450 990
2022-04-18 136451 1001
2022-04-19 137564 1113
2022-04-20 139084 1520
2022-04-21 140325 1241
2022-04-22 141531 1206
2022-04-23 142650 1119
2022-04-24 143382 732
2022-04-25 144044 662
2022-04-26 144685 641
2022-04-27 145704 1019
2022-04-28 146547 843
2022-04-29 147381 834
2022-04-30 148204 823
2022-05-01 148875 671
2022-05-02 149618 743
2022-05-03 150591 973
2022-05-04 151362 771
2022-05-05 152120 758
2022-05-06 152886 766
2022-05-07 153662 776
2022-05-08 154306 644
2022-05-09 155032 726
2022-05-10 156216 1184
2022-05-11 157148 932
2022-05-12 158112 964
2022-05-13 158992 880
2022-05-14 159868 876
2022-05-15 160555 687
2022-05-16 161388 833
2022-05-17 162582 1194
2022-05-18 163645 1063
2022-05-19 164528 883
2022-05-20 165402 874
2022-05-21 166203 801
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment