Last active
March 20, 2022 11:29
-
-
Save benmarwick/9a54cbd325149a8ff405 to your computer and use it in GitHub Desktop.
Using R for particle size and dN/dlogDp time series plots for aerosol pollution, now a package at https://github.com/benmarwick/smps
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# from my Q&A here http://stackoverflow.com/questions/36014676/use-r-to-recreate-contour-plot-made-in-igor | |
# I assnme the working directory is set to the folder that contains the data. | |
# from my Q&A here http://stackoverflow.com/questions/36014676/use-r-to-recreate-contour-plot-made-in-igor | |
# I assnme the working directory is set to the folder that contains the data. | |
# read in the data | |
dat <- read.csv("contour_plot_data.csv") | |
# focus on the untransformed values | |
dat <- dat[, 1:108] | |
# get Diameter value from col names | |
Diameter <- as.numeric(gsub("X", "", names(dat)[-1])) | |
# melt the wide data into long format | |
# see http://www.cookbook-r.com/Manipulating_data/Converting_data_between_wide_and_long_format/ | |
library(tidyr) | |
dat_long <- gather(dat, "Diameter", "dN_dlogDp", 2:108) | |
# we want diameter as a numeric | |
dat_long$Diameter <- as.numeric(gsub("X", "", dat_long$Diameter )) | |
# we want time as a date-formatted variable | |
x <- as.character(dat_long$Time) | |
date_ <- as.Date(x, format = "%d/%m/%Y") | |
time_ <- gsub(" ", "", substr(x, nchar(x) - 4, nchar(x))) | |
dat_long$Time <- as.POSIXct(paste0(date_, " ", time_)) | |
# The Igor plot seems to use log dN_dlogDp values, so let's get those | |
dat_long$dN_dlogDp_log <- log10(dat_long$dN_dlogDp) | |
dat_long$dN_dlogDp_log <- ifelse(dat_long$dN_dlogDp_log == "NaN" | | |
dat_long$dN_dlogDp_log == "-Inf" , 0.01, dat_long$dN_dlogDp_log) | |
# interpolate between the values for a smoother contour | |
# this takes a moment or two... | |
# increase these values to make it quicker, trial and error! | |
interval_in_seconds <- 200 | |
interval_diameter <- 0.75 | |
# interpolate the log values | |
library(akima) | |
xo <- with(dat_long, seq(min(Time), max(Time), interval_in_seconds)) | |
yo <- with(dat_long, seq(min(Diameter), max(Diameter), interval_diameter)) | |
dat_interp_log <- with(dat_long, interp(Time, Diameter, dN_dlogDp_log, xo = xo, yo = yo) ) | |
# interpolate the raw values | |
library(akima) | |
xo <- with(dat_long, seq(min(Time), max(Time), interval_in_seconds)) | |
yo <- with(dat_long, seq(min(Diameter), max(Diameter), interval_diameter)) | |
dat_interp_raw <- with(dat_long, interp(Time, Diameter, dN_dlogDp, xo = xo, yo = yo) ) | |
# get on with plotting... | |
# make log data into a data frame for ggplot | |
dat_interp_log_df <- data.frame(matrix(data = dat_interp_log$z, | |
ncol = length(dat_interp_log$y), | |
nrow = length(dat_interp_log$x))) | |
names(dat_interp_log_df) <- dat_interp_log$y | |
dat_interp_log_df$Time <- as.POSIXct(dat_interp_log$x, origin = "1970-01-01") | |
# wide to long | |
dat_interp_log_df_long <- gather(dat_interp_log_df, "Diameter", "dN_dlogDp_log", 1:(ncol(dat_interp_log_df)-1)) | |
dat_interp_log_df_long$Diameter <- as.numeric(as.character(dat_interp_log_df_long$Diameter)) | |
# make raw data into a data frame for ggplot | |
dat_interp_raw_df <- data.frame(matrix(data = dat_interp_raw$z, | |
ncol = length(dat_interp_raw$y), | |
nrow = length(dat_interp_raw$x))) | |
names(dat_interp_raw_df) <- dat_interp_raw$y | |
dat_interp_raw_df$Time <- as.POSIXct(dat_interp_raw$x, origin = "1970-01-01") | |
# wide to long | |
dat_interp_raw_df_long <- gather(dat_interp_raw_df, "Diameter", "dN_dlogDp_raw", 1:(ncol(dat_interp_raw_df)-1)) | |
dat_interp_raw_df_long$Diameter <- as.numeric(as.character(dat_interp_raw_df_long$Diameter)) | |
dat_interp_df_long <- cbind(dat_interp_raw_df_long, dN_dlogDp_log = dat_interp_log_df_long$dN_dlogDp_log) | |
######################## plots ################################### | |
library(ggplot2) | |
library(scales) # for date_breaks | |
library(viridis) | |
# function for minor tick marks | |
every_nth <- function(x, nth, empty = TRUE, inverse = FALSE) | |
{ | |
if (!inverse) { | |
if(empty) { | |
x[1:nth == 1] <- "" | |
x | |
} else { | |
x[1:nth != 1] | |
} | |
} else { | |
if(empty) { | |
x[1:nth != 1] <- "" | |
x | |
} else { | |
x[1:nth == 1] | |
} | |
} | |
} | |
# get visually diminishing axis ticks | |
base_breaks <- function(n = 10){ | |
function(x) { | |
axisTicks(log10(range(x, na.rm = TRUE)), log = TRUE, n = n) | |
} | |
} | |
insert_minor <- function(major_labs, n_minor) {labs <- | |
c( sapply( major_labs, function(x) c(x, rep("", multiple) ) ) ) | |
labs[1:(length(labs)-n_minor)]} | |
# adjust the colour ramp to match the Igor plot, please experiment with the numbers here! | |
colfunc <- colorRampPalette(c( rep("red", 3), | |
rep("yellow", 1), | |
rep("green", 1), | |
"cyan", | |
rep("blue", 3), | |
"purple")) | |
# function to set the legend labels | |
fill_scale_labels <- function(x) { | |
parse(text = paste0("10^",x)) | |
} | |
y_labels_breaks <- seq(0, max(Diameter), 100) | |
mytheme <- theme_bw(base_size = 14) + theme(aspect.ratio = 1.6/5) | |
###################### plot the values ###################################### | |
# add tick marks every two hours | |
start_date <- min(dat_interp_df_long$Time) | |
end_date <- max(dat_interp_df_long$Time) | |
date_breaks_2h <- seq(from = start_date, to = end_date, by = "2 hours") | |
date_breaks_1_day <- seq(from = start_date, to = end_date, by = "1 day") | |
multiple <- length(date_breaks_2h) / length(date_breaks_1_day) | |
ggplot(dat_interp_df_long, aes(y = Diameter, x = Time, fill = dN_dlogDp_log)) + | |
geom_raster(interpolate = TRUE) + | |
scale_fill_gradientn(name=expression(dN/dlogD[p](cm^-3)), | |
colours = rev(colfunc(100)), | |
labels = fill_scale_labels) + | |
scale_y_continuous(expand = c(0,0), | |
labels = every_nth(y_labels_breaks, 2, inverse = TRUE), | |
breaks = y_labels_breaks) + | |
scale_x_datetime(expand = c(0,0), | |
breaks=date_breaks_2h, | |
labels=insert_minor(format(date_breaks_1_day, "%d %b"), | |
length(date_breaks_1_day))) + | |
xlab("Day and time") + | |
ylab("Diameter (nm)") + | |
mytheme | |
################## plot the log values: y-axis with log scale ########################### | |
# Now with log axis, we need to replace the ymin and ymax | |
distance <- diff((unique(dat_interp_df_long$Diameter)))/2 | |
upper <- (unique(dat_interp_df_long$Diameter)) + c(distance, distance[length(distance)]) | |
lower <- (unique(dat_interp_df_long$Diameter)) - c(distance[1], distance) | |
# Create xmin, xmax, ymin, ymax | |
dat_interp_df_long$xmin <- dat_interp_df_long$Time - 1000 # default of geom_raster is 0.5 | |
dat_interp_df_long$xmax <- dat_interp_df_long$Time + 1000 | |
idx <- rle(dat_interp_df_long$Diameter)$lengths[1] | |
dat_interp_df_long$ymin <- unlist(lapply(lower, function(i) rep(i, idx))) | |
dat_interp_df_long$ymax <- unlist(lapply(upper, function(i) rep(i, idx))) | |
ggplot(dat_interp_df_long, aes(y = Diameter, x = Time, | |
xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, | |
fill = dN_dlogDp_log)) + | |
geom_rect() + | |
scale_fill_gradientn(name=expression(dN/dlogD[p](cm^-3)), | |
colours = rev(colfunc(100)), | |
labels = fill_scale_labels) + | |
scale_y_continuous(expand = c(0,0), | |
trans = log_trans(), breaks = base_breaks()) + | |
scale_x_datetime(expand = c(0,0), | |
breaks=date_breaks_2h, | |
labels=insert_minor(format(date_breaks_1_day, "%d %b"), | |
length(date_breaks_1_day))) + | |
xlab("Day and time") + | |
ylab("Diameter (nm)") + | |
mytheme | |
################## plot one day ########################### | |
# subset one day from the dat | |
# see which method is faster for you | |
one_day <- dat_interp_df_long[grepl("2013-01-26", dat_interp_df_long$Time), ] | |
library(dplyr) | |
one_day <- dat_interp_df_long %>% | |
filter(grepl("2013-01-26", Time)) | |
# for custom time breaks | |
start_date <- min(one_day$Time) | |
end_date <- max(one_day$Time) | |
time_labels_breaks <- seq(start_date, end_date, "3 hours") | |
ggplot(dat_interp_df_long, aes(y = Diameter, x = Time, | |
xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, | |
fill = dN_dlogDp_log)) + | |
geom_rect() + | |
scale_fill_gradientn(name=expression(dN/dlogD[p](cm^-3)), | |
colours = rev(colfunc(100)), | |
labels = fill_scale_labels) + | |
scale_y_continuous(expand = c(0,0), | |
trans = log_trans(), breaks = base_breaks()) + | |
scale_x_datetime(expand = c(0,0), | |
limits = c(as.POSIXct(start_date), as.POSIXct(end_date)), | |
breaks = time_labels_breaks, | |
labels = strftime(time_labels_breaks, "%H:%S") ) + | |
xlab("Time") + | |
ylab("Diameter (nm)") + | |
mytheme | |
Great script. Someone have the file contour_plot_data.csv in order to recreate the plot.
Thanks a lot
Manuel
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here is the plot we are trying to reproduce, made with Igor:
Here is a very close reproduction using the R code in the above gist:
And here is the same, but with a log y-axis, made with R:
And here is the plot for one day: