This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
###### Process data and subset on countries | |
# sort(unique(series_df$Country.Region)) | |
selected_countries = c('China', 'Italy', 'US', 'Iran') | |
## sum group by country by status | |
date_col_idx = which(grepl('X', colnames(series_df))) | |
country_data = series_df %>% | |
filter(Country.Region %in% selected_countries) %>% | |
select(c(Country.Region, Status, colnames(.)[date_col_idx])) %>% |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### check whether the column names of 3 datasets match up | |
columns = sapply(series_data_, colnames) | |
### !!! The code below certainly works for datasets with small numbers of columns, | |
### However, what if we have 1000 columns to do pair-wise checking, | |
### or additional columns being added to the datasource? | |
all(columns[, 1] == columns[, 2]) | |
all(columns[, 2] == columns[, 3]) | |
all(columns[, 1] == columns[, 3]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#### Append a list of datasets into one single data frame | |
series_df = do.call(rbind, series_data_) | |
##### (optional) Recode the values | |
series_df[which(series_df$Country.Region == 'United Kingdom'), 'Country.Region'] = 'UK' | |
series_df[which(series_df$Country.Region == 'Korea, South'), 'Country.Region'] = 'Korea S' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#### helper function for trajectory line chart: | |
#### note: there is global variable in this function!!! | |
helper_vis_continuous <- function(dat = country_data, country = 'US') { | |
## vertical structure for ggplot | |
current_dat = dat %>% | |
filter(Country.Region == country) %>% | |
reshape2::melt(.) %>% | |
set_colnames(c('Country', 'Status', 'Date', 'Total')) | |
## starts with the date when 1st case was confirmed for this country |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#### Select the countries for plotting, and convert wide format to long | |
current_dat = country_data %>% | |
filter(Country.Region %in% selected_countries) %>% | |
reshape2::melt(.) %>% | |
set_colnames(c('Country', 'Status', 'Date', 'Total')) | |
## starts with the date when 1st case confirmed | |
find_case1_onwards <- function(country_name) { | |
case1_idx = which(current_dat[current_dat$Country == country_name, 'Total'] > 0)[1] | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#### Plotly with ggplot facet_wrap | |
country_plot = ggplot(dat = current_dat, | |
aes_string(x = 'Date', y = 'Total', color = 'Status', group = 'Status', linetype = 'Status')) + | |
geom_line(lwd = 1.2) + | |
facet_wrap(~ Country, scales = "free") + | |
labs(title = sprintf('Trajectories of the Status of Coronavirus \n')) + | |
xlab('Date') + ylab('Total Numbers') + | |
theme_bw() + |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
####### Heatmap from 2/22 to 3/28 | |
library(gplots) | |
my_palette <- colorRampPalette(c("light blue", "black", "red"))(n = 1000) | |
heatmap_dat = country_data %>% | |
filter(Status == 'confirmed') | |
## subset: after Feb 22nd | |
col_idx = which(colnames(heatmap_dat) == '2_22_20') | |
heatmap_dat = heatmap_dat[, c(1, 2, col_idx:ncol(heatmap_dat))] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#####======= static and animated plots | |
total_text_y = 0.87*(max(confirmed_formatted$Total)) | |
panel_size_y = max(confirmed_formatted$Total) * 1.15 | |
vline_original_y = seq(floor(max(confirmed_formatted$Total)/8), | |
max(confirmed_formatted$Total), by = floor(max(confirmed_formatted$Total)/8)) | |
country_font_size = 10 | |
bar_end_num_size = 11 | |
staticplot = ggplot(confirmed_formatted, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#### Specify the transition length and ease_aes to give it a smoother transition | |
current_state_len = 0 | |
current_transition_len = 3 | |
anim = staticplot + | |
transition_states(Date, transition_length = current_transition_len, state_length = current_state_len) + | |
ease_aes('cubic-in-out') + | |
view_follow(fixed_x = TRUE, fixed_y = c(-10, NA)) + | |
labs(title = 'Spead of Confirmed Cases per day: {closest_state}', | |
subtitle = 'Top 10 Countries/Regions', |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
######## Point to your data directory | |
series_all_files = list.files(series_data_dir) | |
series_data_files = series_all_files[grepl('.csv', series_all_files)] | |
print(sprintf('Total data files = %s', length(series_data_files))) | |
series_data_ = lapply(series_data_files, | |
function(i) { | |
dat = read.csv(paste0(series_data_dir, '/', i), stringsAsFactors = FALSE) | |
file_ = gsub('.csv', '', i) |