Code for blog post: http://matthewdharris.com/2016/03/29/boxplot-or-not-to-boxplot-woe-ful-example/ A post to compare a bunch of visualizations against the boxplot.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("data.table") | |
library("rowr") | |
library("dplyr") | |
library("ggplot2") | |
library("Information") | |
library("knitr") | |
library("ggrepel") | |
library("ggthemes") | |
library("ggalt") | |
library("xtable") | |
# little Not In function | |
'%ni%' <- Negate('%in%') | |
dat <- fread("YOUR DATA LOCATION.CSV") | |
dat <- data.frame(dat) | |
### Data is a nXp dataframe where the p columns contain: | |
# [presence] as a 0/1 binary presence/absence indicator | |
# [SITENO] which is a character field that has the group level name for presence and "background" for background samples | |
# any number of varaibles that you want to model as indicators of presence/absence | |
# the n rows are individual measurements of the variables at sampling locations within SITENO spatail groups. | |
# to replicate these, and dataframe with 0/1 [presence], chr group [SITENO], and numeric variables should work. | |
# remove some unneeded columns | |
dat_trimmed <- dat[,colnames(dat) %ni% c("V1", "tpi_sd250c", "tpi_cls250c", | |
"e_trail_dist")] | |
# set up data partition variables | |
train_v_test_fraction <- 0.8 # 0 to 1 | |
sites_per_train <- "all" # integer or "all" | |
sites_per_test <- "all" # integer or "all" | |
absence_presence_balance <- 3 # 1 for balanced; typically 3 | |
data_reduction_fraction <- 1 # 0 to 1 | |
runs <- 10 # resample repeats | |
# function to sample data for train/test sets | |
get.train.test.sets <- function(dat, train_v_test_fraction, sites_per_train, sites_per_test, | |
absence_presence_balance, data_reduction_fraction){ | |
# set up train and test site SITENO lists | |
site_names <- unique(dat$SITENO)[-length(unique(dat$SITENO))] # remove "background" | |
train_sites <- site_names[sample(seq_along(site_names),length(site_names)*train_v_test_fraction)] | |
test_sites <- site_names[!(site_names %in% train_sites)] | |
# reduce train and test site data volume, but still sampling from test or train | |
if(sites_per_train == "all") sites_per_train <- length(train_sites) | |
if(sites_per_test == "all") sites_per_test <- length(test_sites) | |
tr_sites <- filter(dat, presence == 1, SITENO %in% sample(train_sites, sites_per_train)) | |
te_sites <- filter(dat, presence == 1, SITENO %in% sample(test_sites , sites_per_test)) | |
# sample a 3 X multiplied site background sample for test and train | |
tr_background <- filter(dat, presence == 0) %>% | |
sample_n(nrow(tr_sites)*absence_presence_balance) | |
te_background <- filter(dat, presence == 0) %>% | |
sample_n(nrow(te_sites)*absence_presence_balance) | |
# make test and train presence/absence data sets // reduce data volume significantly as % of cells | |
train <- rbind_all(list(tr_sites, tr_background)) %>% | |
sample_frac(data_reduction_fraction) | |
test <- rbind_all(list(te_sites, te_background)) %>% | |
sample_frac(data_reduction_fraction) | |
return(list(train = train, test = test)) | |
} | |
# container for results | |
IV_results <- data.frame() | |
# loop through resamples to get results | |
for(i in 1:runs){ | |
message(i) | |
# retrive a train/test set as list | |
test_train <- get.train.test.sets(dat_trimmed, train_v_test_fraction, sites_per_train, sites_per_test, | |
absence_presence_balance, data_reduction_fraction) | |
# split train/test list into individual train and test objects | |
train <- test_train[["train"]] | |
test <- test_train[["test"]] | |
### variable imporatnce with Information::create_infotables | |
IV <- create_infotables(data = train, | |
valid = test, | |
y = "presence", | |
parallel=FALSE) | |
# to see single resample results, use these functions | |
# knitr::kable(IV$Summary) | |
# variable <- "e_hyd_min" | |
# knitr::kable(IV$Tables[[variable]]) | |
# plot_infotables(IV, variable, show_values=TRUE) | |
iv_df <- data.frame(IV$Summary, run = i) | |
IV_results <- rbind(IV_results, iv_df) | |
} | |
# table representations | |
IV_summary <- group_by(IV_results, Variable) %>% | |
summarise(Minimum = min(AdjIV), | |
p5 = quantile(AdjIV, probs = 0.05), | |
Mean = mean(AdjIV), | |
Median = median(AdjIV), | |
p95 = quantile(AdjIV, probs = 0.95), | |
Maximum = max(AdjIV)) %>% | |
data.frame | |
IV_summary[,-1] <- round(IV_summary[,-1,0],2) | |
kable(IV_summary) | |
# for maing latex and HTML table for blog post | |
# x_IV_summary <- xtable(IV_summary) | |
# print(x_IV_summary) | |
# print(xtable(x_IV_summary), type = "html") | |
### PLOTS | |
# line with line end version | |
p1 <- ggplot(data = IV_results, aes(x = run, y = AdjIV, group = Variable, color = Variable)) + | |
geom_line() + | |
theme_bw() + | |
geom_label_repel(data = IV_results %>% filter(run==runs), | |
aes(label=Variable, fill=Variable), | |
nudge_x = 2.5, nudge_y = 0, size=2, color='white', | |
force=5, segment.color='#bbbbbb', max.iter = 3000) + | |
expand_limits(x=(runs + 2)) + | |
labs(title="Variable Information Value for Site Presence & Absence", | |
subtitle="Values for 10 Resamples", | |
x = "Resample", | |
y = "Adjusted Information Value (IV)") + | |
scale_y_continuous(breaks = seq(-1,1,0.25)) + | |
scale_x_continuous(breaks = seq(0,runs,1)) + | |
theme( | |
legend.position = "none", | |
panel.border = element_rect(colour = "gray90"), | |
axis.text.x = element_text(angle = 0, size = 8, hjust = 1, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS", face = "bold"), | |
plot.title = element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle = element_text(family="TrebuchetMS-Italic") | |
) | |
plot(p1) | |
ggsave(filename = "IV_lines.png", width = 6, height = 4) | |
# lines as splines | |
p2 <- ggplot(data = IV_results, aes(x = run, y = AdjIV, group = Variable, color = Variable)) + | |
geom_xspline(spline_shape=-0.4, size=0.5) + | |
geom_point() + | |
theme_bw() + | |
labs(title="Variable Information Value for Site Presence & Absence", | |
subtitle="Values for 10 Resamples", | |
x = "Resample", | |
y = "Adjusted Information Value (IV)") + | |
scale_y_continuous(breaks = seq(-1,1,0.25)) + | |
scale_x_continuous(breaks = seq(0,runs,1)) + | |
theme( | |
legend.position = "none", | |
panel.border = element_rect(colour = "gray90"), | |
axis.text.x = element_text(angle = 0, size = 8, hjust = 1, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS", face = "bold"), | |
plot.title = element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle = element_text(family="TrebuchetMS-Italic") | |
) | |
plot(p2) | |
ggsave(filename = "IV_splines.png", width = 6, height = 4) | |
# ordered boxplot version | |
mean_adjIV <- group_by(IV_results, Variable) %>% | |
summarise(mean = mean(AdjIV)) %>% | |
data.frame() | |
IV_results$Variable_ordered <-factor(IV_results$Variable, | |
levels = mean_adjIV[order(mean_adjIV$mean, decreasing = TRUE), | |
"Variable"]) | |
p3 <- ggplot(data = IV_results, aes(x = Variable_ordered, y = AdjIV)) + | |
geom_hline(yintercept = 0, color = "gray70", linetype = "dashed") + | |
geom_boxplot(fill = "skyblue", outlier.colour = "gray20", outlier.shape = 1) + | |
theme_bw() + | |
labs(title="Variable Information Value for Site Presence & Absence", | |
subtitle="Values for 10 Resamples Ordered by Mean IV", | |
x = "Variable", | |
y = "Adjusted Information Value (IV)") + | |
scale_y_continuous(breaks = seq(-1,1,0.25)) + | |
theme( | |
legend.position = "none", | |
panel.border = element_rect(colour = "gray90"), | |
axis.text.x = element_text(angle = 90, size = 8, hjust = 1, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS", face = "bold"), | |
plot.title = element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle = element_text(family="TrebuchetMS-Italic") | |
) | |
plot(p3) | |
ggsave(filename = "IV_boxplot.png", width = 6, height = 4) | |
# bar plot with error | |
se <- function(x) sqrt(var(x)/length(x)) | |
IV_bar_summary <- group_by(IV_results, Variable) %>% | |
summarise(SE = se(AdjIV), | |
Mean = mean(AdjIV), | |
Upper = Mean + SE, | |
Lower = Mean - SE) %>% | |
data.frame | |
IV_bar_summary$Variable_ordered <-factor(IV_bar_summary$Variable, | |
levels = IV_bar_summary[order(IV_bar_summary$Mean, decreasing = TRUE), | |
"Variable"]) | |
p4 <- ggplot(data = IV_bar_summary, aes(x = Variable_ordered, y = Mean)) + | |
geom_hline(yintercept = 0, color = "gray70", linetype = "dashed") + | |
geom_bar(stat = "identity", fill = "skyblue") + | |
geom_errorbar(aes(ymax = Upper, ymin = Lower), width=0.25) + | |
# geom_boxplot(data = IV_results, aes(x = Variable_ordered, y = AdjIV), fill = "skyblue", outlier.colour = "gray20", outlier.shape = 1) | |
theme_bw() + | |
labs(title="Variable Information Value for Site Presence & Absence", | |
subtitle="Values for 10 Resamples Ordered by Mean IV, Showing Standard Error", | |
x = "Variable", | |
y = "Adjusted Information Value (IV)") + | |
# scale_y_continuous(breaks = seq(-1,1,0.25)) + | |
theme( | |
legend.position = "none", | |
panel.border = element_rect(colour = "gray90"), | |
axis.text.x = element_text(angle = 90, size = 8, hjust = 1, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS", face = "bold"), | |
plot.title = element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle = element_text(family="TrebuchetMS-Italic") | |
) | |
plot(p4) | |
ggsave(filename = "IV_Bar.png", width = 6, height = 4) | |
# Density overlap | |
p5 <- ggplot(data = IV_results, aes(x = AdjIV, group = Variable, fill = Variable)) + | |
geom_density(alpha = 0.33) + | |
theme_bw() + | |
labs(title="Variable Information Value for Site Presence & Absence", | |
subtitle="Values for 10 Resamples", | |
x = "Adjusted Information Value (IV)", | |
y = "Density") + | |
scale_x_continuous(breaks = seq(-1,1,0.25)) + | |
theme( | |
legend.position = "none", | |
panel.border = element_rect(colour = "gray90"), | |
axis.text.x = element_text(angle = 0, size = 8, hjust = 1, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS", face = "bold"), | |
plot.title = element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle = element_text(family="TrebuchetMS-Italic") | |
) | |
plot(p5) | |
ggsave(filename = "IV_density.png", width = 6, height = 4) | |
# facetted densities | |
p6 <- ggplot(data = IV_results, aes(x = AdjIV, group = Variable_ordered)) + | |
geom_density(fill = "skyblue") + | |
geom_vline(xintercept = 0, color = "gray50", linetype = "dashed") + | |
theme_bw() + | |
labs(title="Variable Information Value for Site Presence & Absence", | |
subtitle="Values for 10 Resamples", | |
x = "Adjusted Information Value (IV)", | |
y = "Density") + | |
scale_x_continuous(breaks = seq(-1,1,0.25)) + | |
facet_grid(Variable_ordered~.) + | |
theme( | |
legend.position = "none", | |
panel.grid = element_blank(), | |
strip.background = element_rect(colour = "gray50", fill = "white"), | |
strip.text.y = element_text(colour = "black", size = 6, face = "bold", family = "Trebuchet MS"), | |
panel.border = element_rect(colour = "gray90"), | |
axis.text.x = element_text(angle = 0, size = 8, hjust = 1, family = "Trebuchet MS"), | |
axis.text.y = element_blank(), | |
axis.title = element_text(size = 10, family = "Trebuchet MS", face = "bold"), | |
plot.title = element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle = element_text(family="TrebuchetMS-Italic"), | |
axis.ticks.y = element_blank() | |
) | |
plot(p6) | |
ggsave(filename = "IV_density_facet.png", width = 8, height = 10) | |
# facet wrapped densities | |
p7 <- ggplot(data = IV_results, aes(x = AdjIV, group = Variable_ordered)) + | |
geom_density(fill = "skyblue") + | |
geom_vline(xintercept = 0, color = "gray50", linetype = "dashed") + | |
theme_bw() + | |
labs(title="Variable Information Value for Site Presence & Absence", | |
subtitle="Values for 10 Resamples", | |
x = "Adjusted Information Value (IV)", | |
y = "Density") + | |
scale_x_continuous(breaks = seq(-1,1,0.5)) + | |
facet_wrap( ~ Variable_ordered, ncol = 4) + | |
theme( | |
legend.position = "none", | |
panel.grid = element_blank(), | |
strip.background = element_rect(colour = "gray50", fill = "white"), | |
strip.text = element_text(colour = "black", size = 9, face = "bold", family = "Trebuchet MS"), | |
panel.border = element_rect(colour = "gray90"), | |
axis.text.x = element_text(angle = 0, size = 8, hjust = 1, family = "Trebuchet MS"), | |
axis.text.y = element_blank(), | |
axis.title = element_text(size = 10, family = "Trebuchet MS", face = "bold"), | |
plot.title = element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle = element_text(family="TrebuchetMS-Italic"), | |
axis.ticks.y = element_blank() | |
) | |
plot(p7) | |
ggsave(filename = "IV_density_facet_wrap.png", width = 6, height = 6) | |
# bivariate | |
# ordered by AdjIV because that is what we care about in the end; not raw IV | |
p7 <- ggplot(IV_results, aes(x = IV, y = PENALTY, | |
group = Variable_ordered, color = Variable_ordered)) + | |
geom_point() + | |
geom_smooth(method = "lm", se = FALSE, color = "gray50") + | |
theme_bw() + | |
labs(title="Variable Information vs. CV Penalty for Site Presence & Absence", | |
subtitle="Values for 10 Resamples, Ordered by Mean AdjIV ", | |
x = "Information Value (IV)", | |
y = "CV Penalty") + | |
facet_wrap(~Variable_ordered, nrow = 4) + | |
theme( | |
legend.position = "none", | |
# panel.grid = element_blank(), | |
strip.background = element_rect(colour = "gray50", fill = "white"), | |
strip.text.y = element_text(colour = "black", size = 8, face = "bold", family = "Trebuchet MS"), | |
axis.text.x = element_text(angle = 0, size = 8, hjust = 1, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS", face = "bold"), | |
plot.title = element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle = element_text(family="TrebuchetMS-Italic") | |
) | |
plot(p7) | |
ggsave(filename = "IV_ bivariate.png", width = 8, height = 6) | |
# Compare ranked variables plot | |
sort_adjIV <- group_by(IV_results, Variable) %>% | |
summarise(mean_AdjIV = mean(AdjIV), | |
mean_IV = mean(IV)) %>% | |
data.frame() | |
IV_ord <- sort_adjIV[order(sort_adjIV$mean_IV, decreasing = TRUE),"Variable"] | |
AdjIV_ord <- sort_adjIV[order(sort_adjIV$mean_AdjIV, decreasing = TRUE), "Variable"] | |
var_rank <- data.frame(IV = IV_ord, AdjIV = AdjIV_ord, | |
IV_rank = seq(1:length(IV_ord)), | |
AdjIV_rank = match(IV_ord, AdjIV_ord)) | |
p8 <- ggplot(var_rank) + | |
geom_text(aes(x="IV", y = 19-IV_rank, label=IV), size=3.2, hjust=1, family="TrebuchetMS-Bold") + | |
geom_text(aes(x="AdjIV", y = 19-AdjIV_rank, label=IV), size=3.2, hjust=0, family="TrebuchetMS-Bold") + | |
geom_segment(aes(x="AdjIV", y=19-AdjIV_rank, xend="IV", yend=19-IV_rank), alpha=.5) + | |
geom_hline(yintercept = -1) + | |
theme_void() + | |
# scale_y_continuous(breaks=c(-1,19), labels=c("","")) + | |
scale_x_discrete(limits = c("IV", "AdjIV")) + | |
labs(title="Rank Change Betweeen IV and AdjIV", | |
x = "Metric", | |
y = "Relative Rank") + | |
theme( | |
axis.text.x = element_text(angle = 0, size = 12, hjust = 1, family = "Trebuchet MS", face = "bold"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS", face = "bold"), | |
plot.title = element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle = element_text(family="TrebuchetMS-Italic"), | |
axis.title.x = element_text(), | |
axis.title.y = element_text(angle = 90) | |
) | |
plot(p8) | |
ggsave(filename = "IV_rank_change.png", width = 6, height = 4) | |
# Violin and Boxplot combination | |
p9 <- ggplot(data = IV_results, aes(x = Variable_ordered, y = AdjIV, group = Variable_ordered)) + | |
geom_hline(yintercept = 0, color = "gray70", linetype = "dashed") + | |
geom_violin(color = "white", fill = "skyblue", scale = "width", alpha = 0.65) + | |
geom_boxplot(color = "skyblue3", fill = "dodgerblue4", width = 0.3, | |
outlier.colour = "dodgerblue4", outlier.shape = 19, size = 0.25) + | |
# coord_flip() + | |
theme_bw() + | |
labs(title="Variable Information Value for Site Presence & Absence", | |
subtitle="Values for 10 Resamples Ordered by Mean IV, Showing Standard Error", | |
x = "Variable", | |
y = "Adjusted Information Value (IV)") + | |
scale_y_continuous(breaks = seq(-1,1,0.25)) + | |
theme( | |
legend.position = "none", | |
panel.border = element_rect(colour = "gray90"), | |
axis.text.x = element_text(angle = 90, size = 8, hjust = 1, vjust = 0, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 8, family = "Trebuchet MS"), | |
axis.title = element_text(size = 10, family = "Trebuchet MS", face = "bold"), | |
plot.title = element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle = element_text(family="TrebuchetMS-Italic") | |
) | |
plot(p9) | |
ggsave(filename = "IV_violin_boxplot.png", width = 6, height = 4) | |
# totally wonky code for stem & Leaf and boxplot combo | |
trim.leading <- function (x) sub("^\\s+", "", x) | |
# simulate data | |
cc <- data.frame(value = replicate(4,rnorm(30,sample(3:7,1),1)), g = 1:30) | |
# loop over colums to capture stem() output and format | |
leaf_bind <- NULL | |
for(i in 1:(ncol(cc)-1)){ | |
tmp <- capture.output(stem(cc[,i], width = 100, scale=1, atom = 2))[-2] | |
ss <- as.numeric(substring(trim.leading(tmp),1,2)) | |
ss[1] <- ss[3]-2 | |
ss[2] <- ss[3]-1 | |
ss[length(ss)] <- ss[length(ss)-1]+1 | |
# add formated S&L to output table with group | |
sdf = data.frame(tmp, digi = ss, rr=1:length(tmp), g = i) | |
leaf_bind <- rbind(leaf_bind, sdf) | |
} | |
# hasty clean up | |
ccm <- cc[,-(ncol(cc))] | |
colnames(ccm) <- seq(1:ncol(ccm)) | |
# melt to plot | |
ccm <- melt(ccm) | |
colnames(ccm) <- c("g", "value") | |
p10 <- ggplot()+ | |
geom_boxplot(data = ccm, aes(y = value, x = g), color = "skyblue3", | |
fill = "skyblue", width = 0.2, size = 0.25, alpha = 0.5) + | |
geom_text(data = leaf_bind, aes(y = digi, x= g+0.1, label=tmp), | |
hjust=0, fontface = "bold", size = 3, family = "TrebuchetMS") + | |
theme_bw() + | |
expand_limits(x = 5.5) + | |
labs(title="Boxplots & Steam and Leaf", | |
subtitle="beacuse, it's what Tukey would do", | |
x = "Class", | |
y = "Authentic Value")+ | |
theme(axis.ticks=element_blank(), | |
panel.grid=element_blank(), | |
plot.title = element_text(family="TrebuchetMS-Bold", size = 20), | |
plot.subtitle = element_text(family="TrebuchetMS-Italic", size = 15), | |
axis.title = element_text(size = 10, family = "Trebuchet MS", face = "bold") | |
) | |
plot(p10) | |
ggsave(filename = "IV_bp_stemleaf.png", width = 6, height = 4) | |
# Tufte bos plots plots | |
p11 <- ggplot(data = IV_results, aes(x = Variable_ordered, y = AdjIV)) + | |
geom_tufteboxplot(median.type = "line", whisker.type = 'point', hoffset = 0) + | |
theme_tufte(ticks = FALSE) + | |
labs(title="Variable Information Value for Site Presence & Absence", | |
subtitle="Values for 10 Resamples Ordered by Mean IV", | |
x = "Variable", | |
y = "Adjusted Information Value (IV)") + | |
scale_y_continuous(breaks = seq(-1,1,0.25)) + | |
theme( | |
axis.text.x = element_text(angle = 90, size = 6, hjust = 1, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 6, family = "Trebuchet MS"), | |
axis.title = element_text(size = 8, family = "Trebuchet MS", face = "bold"), | |
plot.title = element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle = element_text(family="TrebuchetMS-Italic") | |
) | |
plot(p11) | |
ggsave(filename = "IV_tufte_boxplot1.png", width = 6, height = 4) | |
p12 <- ggplot(data = IV_results, aes(x = Variable_ordered, y = AdjIV)) + | |
geom_tufteboxplot() + | |
theme_tufte(ticks = FALSE) + | |
labs(title="Variable Information Value for Site Presence & Absence", | |
subtitle="Values for 10 Resamples Ordered by Mean IV", | |
x = "Variable", | |
y = "Adjusted Information Value (IV)") + | |
scale_y_continuous(breaks = seq(-1,1,0.25)) + | |
theme( | |
axis.text.x = element_text(angle = 90, size = 6, hjust = 1, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 6, family = "Trebuchet MS"), | |
axis.title = element_text(size = 8, family = "Trebuchet MS", face = "bold"), | |
plot.title = element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle = element_text(family="TrebuchetMS-Italic") | |
) | |
plot(p12) | |
ggsave(filename = "IV_tufte_boxplot2.png", width = 6, height = 4) | |
# simple dot plot with Tufte sparsity | |
p13 <- ggplot(data = IV_results, aes(x = Variable_ordered, y = AdjIV)) + | |
geom_point(size = 0.75) + | |
theme_tufte() + | |
labs(title="Variable Information Value for Site Presence & Absence", | |
subtitle="Values for 10 Resamples Ordered by Mean IV", | |
x = "Variable", | |
y = "Adjusted Information Value (IV)") + | |
scale_y_continuous(breaks = seq(-1,1,0.25)) + | |
theme( | |
axis.text.x = element_text(angle = 90, size = 6, hjust = 1, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 6, family = "Trebuchet MS"), | |
axis.title = element_text(size = 8, family = "Trebuchet MS", face = "bold"), | |
plot.title = element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle = element_text(family="TrebuchetMS-Italic") | |
) | |
plot(p13) | |
ggsave(filename = "IV_tufte_point.png", width = 6, height = 4) | |
p14 <- ggplot(data = IV_results, aes(x = Variable_ordered, y = AdjIV)) + | |
geom_tufteboxplot(median.type = "line") + | |
theme_tufte(ticks = FALSE) + | |
labs(title="Variable Information Value for Site Presence & Absence", | |
subtitle="Values for 10 Resamples Ordered by Mean IV", | |
x = "Variable", | |
y = "Adjusted Information Value (IV)") + | |
scale_y_continuous(breaks = seq(-1,1,0.25)) + | |
theme( | |
axis.text.x = element_text(angle = 90, size = 6, hjust = 1, family = "Trebuchet MS"), | |
axis.text.y = element_text(size = 6, family = "Trebuchet MS"), | |
axis.title = element_text(size = 8, family = "Trebuchet MS", face = "bold"), | |
plot.title = element_text(family="TrebuchetMS-Bold"), | |
plot.subtitle = element_text(family="TrebuchetMS-Italic") | |
) | |
plot(p14) | |
ggsave(filename = "IV_tufte_boxplot3.png", width = 6, height = 4) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Great post. Small observation - I was confused with %ni% in line 17
dat_trimmed <- dat[,colnames(dat) %ni% c("V1", "tpi_sd250c", "tpi_cls250c",
"e_trail_dist")]
Till I saw the definition for the same in line 28
Should not the definition be moved before its first usage ...