Skip to content

Instantly share code, notes, and snippets.

@YiLi225
YiLi225 / process_data.R
Last active March 29, 2020 22:30
line_chart_preparedata
###### Process data and subset on countries
# sort(unique(series_df$Country.Region))
selected_countries = c('China', 'Italy', 'US', 'Iran')
## sum group by country by status
date_col_idx = which(grepl('X', colnames(series_df)))
country_data = series_df %>%
filter(Country.Region %in% selected_countries) %>%
select(c(Country.Region, Status, colnames(.)[date_col_idx])) %>%
@YiLi225
YiLi225 / check_cols.R
Last active March 29, 2020 21:53
Check the columns of the datasets
### check whether the column names of 3 datasets match up
columns = sapply(series_data_, colnames)
### !!! The code below certainly works for datasets with small numbers of columns,
### However, what if we have 1000 columns to do pair-wise checking,
### or additional columns being added to the datasource?
all(columns[, 1] == columns[, 2])
all(columns[, 2] == columns[, 3])
all(columns[, 1] == columns[, 3])
@YiLi225
YiLi225 / combine.R
Last active April 1, 2020 03:38
Combining the list into a dataframe
#### Append a list of datasets into one single data frame
series_df = do.call(rbind, series_data_)
##### (optional) Recode the values
series_df[which(series_df$Country.Region == 'United Kingdom'), 'Country.Region'] = 'UK'
series_df[which(series_df$Country.Region == 'Korea, South'), 'Country.Region'] = 'Korea S'
@YiLi225
YiLi225 / helper.R
Created March 29, 2020 18:02
ggplot line chart helper function
#### helper function for trajectory line chart:
#### note: there is global variable in this function!!!
helper_vis_continuous <- function(dat = country_data, country = 'US') {
## vertical structure for ggplot
current_dat = dat %>%
filter(Country.Region == country) %>%
reshape2::melt(.) %>%
set_colnames(c('Country', 'Status', 'Date', 'Total'))
## starts with the date when 1st case was confirmed for this country
@YiLi225
YiLi225 / wide_to_long.R
Created March 29, 2020 18:12
Create plotting dataset for plotly
#### Select the countries for plotting, and convert wide format to long
current_dat = country_data %>%
filter(Country.Region %in% selected_countries) %>%
reshape2::melt(.) %>%
set_colnames(c('Country', 'Status', 'Date', 'Total'))
## starts with the date when 1st case confirmed
find_case1_onwards <- function(country_name) {
case1_idx = which(current_dat[current_dat$Country == country_name, 'Total'] > 0)[1]
@YiLi225
YiLi225 / ploty_facet_wrap.R
Last active March 31, 2020 04:31
Plotly using facet wrap
#### Plotly with ggplot facet_wrap
country_plot = ggplot(dat = current_dat,
aes_string(x = 'Date', y = 'Total', color = 'Status', group = 'Status', linetype = 'Status')) +
geom_line(lwd = 1.2) +
facet_wrap(~ Country, scales = "free") +
labs(title = sprintf('Trajectories of the Status of Coronavirus \n')) +
xlab('Date') + ylab('Total Numbers') +
theme_bw() +
@YiLi225
YiLi225 / heatmap.R
Last active March 29, 2020 22:43
heatmap for selected countries
####### Heatmap from 2/22 to 3/28
library(gplots)
my_palette <- colorRampPalette(c("light blue", "black", "red"))(n = 1000)
heatmap_dat = country_data %>%
filter(Status == 'confirmed')
## subset: after Feb 22nd
col_idx = which(colnames(heatmap_dat) == '2_22_20')
heatmap_dat = heatmap_dat[, c(1, 2, col_idx:ncol(heatmap_dat))]
@YiLi225
YiLi225 / animate_1.R
Last active March 29, 2020 23:26
animated bar chart
#####======= static and animated plots
total_text_y = 0.87*(max(confirmed_formatted$Total))
panel_size_y = max(confirmed_formatted$Total) * 1.15
vline_original_y = seq(floor(max(confirmed_formatted$Total)/8),
max(confirmed_formatted$Total), by = floor(max(confirmed_formatted$Total)/8))
country_font_size = 10
bar_end_num_size = 11
staticplot = ggplot(confirmed_formatted,
@YiLi225
YiLi225 / animate_2.R
Created March 29, 2020 23:27
animated bar chart -- animate part
#### Specify the transition length and ease_aes to give it a smoother transition
current_state_len = 0
current_transition_len = 3
anim = staticplot +
transition_states(Date, transition_length = current_transition_len, state_length = current_state_len) +
ease_aes('cubic-in-out') +
view_follow(fixed_x = TRUE, fixed_y = c(-10, NA)) +
labs(title = 'Spead of Confirmed Cases per day: {closest_state}',
subtitle = 'Top 10 Countries/Regions',
@YiLi225
YiLi225 / read_in_data.R
Created April 1, 2020 03:37
Read in the series datasets
######## Point to your data directory
series_all_files = list.files(series_data_dir)
series_data_files = series_all_files[grepl('.csv', series_all_files)]
print(sprintf('Total data files = %s', length(series_data_files)))
series_data_ = lapply(series_data_files,
function(i) {
dat = read.csv(paste0(series_data_dir, '/', i), stringsAsFactors = FALSE)
file_ = gsub('.csv', '', i)