Skip to content

Instantly share code, notes, and snippets.

@MarinaGolivets
Last active July 13, 2022 06:37
Show Gist options
  • Save MarinaGolivets/1eaac550a1161f212f8ff93fe5eb2645 to your computer and use it in GitHub Desktop.
Save MarinaGolivets/1eaac550a1161f212f8ff93fe5eb2645 to your computer and use it in GitHub Desktop.
# GP Oekologie 2022
# Ingolf Kühn & Marina Golivets
# install & load R packages ------------------------------------------------------------------------
rm(list = ls())
# packages that we need for analyses
pkgs <- c(
"here", # for handling working directory
"googlesheets4", # for loading data from Google spreadsheets
"tidyverse", # for data manipulation
"magrittr", # for piping
"skimr", # for summarizing data
"gtsummary", # for summarizing data
"taxize", # for checking taxonomic names
"TR8", # for retrieving trait data from databases
"performance", # for inspecting regression models,
"rstatix", # for statistical analyses, e.g. ANOVA
"DescTools", # for miscellaneous basic stats
"ggpubr", # for plotting
"ggrepel", # for plotting
"viridis", # for plotting
"cowplot", # for plotting
"vegan", # for multivariate analyses
"FactoMineR", # for PCA
"ks" # for kernel smoothers
)
# install (if not installed) and load all the packages
# # run only once!
# install.packages("devtools")
# devtools::install_github("ddsjoberg/gtsummary")
# devtools::install_github("ropensci/skimr")
for (p in pkgs) {
if (!require(p, character.only = TRUE)) install.packages(p)
library(p, character.only = TRUE)
}
# choose an option to not treat warnings as errors
options(warn = 1)
# check what packages are being used on your machine
sessionInfo()
# check your working (home) directory
here()
# load data ----------------------------------------------------------------------------------------
if (length(list.files(here(), "veg_data_2022.csv")) == 0) {
# load the data directly from the Google sheet
veg <- googlesheets4::read_sheet(
"https://docs.google.com/spreadsheets/d/1X9DM-uaaQ9MP5KnSNUTonx67BygXYH_7xd1G_vXOIqE",
sheet = "Daten", range = "A1:S142", na = c(NULL, "-", ""), col_types = "iicccidddddddcdccdc"
)
# save the data to your home directory
readr::write_csv2(veg, here("veg_data_2022.csv"))
} else {
# load the data from your home directory
veg <- readr::read_csv2(here("veg_data_2022.csv"))
}
# take a quick look at the data
glimpse(veg)
# rename columns -----------------------------------------------------------------------------------
veg %<>%
rename(
Fortlaufendenr = `...1`,
Aufnahmenr = Aufnahmenr.,
WissName = `Wissenschaftlicher Name`,
Hoehe.avg = `Höhe Ø (cm)`,
Hoehe.var = `Höhe σ²`,
Blattflaeche = `Blatt-Fläche (mm²)`,
Frischmasse = `Frischmasse (g)`,
Trockenmasse = `Trockenmasse (mg)`,
Germinulenmasse = `Germinulenmasse (mg)`,
Feuchte = `Ellenberg-Feuchte`,
Bodenfeuchte = `Bodenfeuchte (%vol)`
)
# take a more detailed look at the data
skimr::skim(veg)
select(veg, -Fortlaufendenr, -Aufnahmenr, -WissName, -Anmerkungen) %>%
gtsummary::tbl_summary()
# check data for errors ----------------------------------------------------------------------------
# reassign expert water availability measures
veg %<>% rows_update(tibble(Aufnahmenr = 1:8, WasserHH = c(1, 2, 3, 5, 6, 7, 8, 4)))
# make sure plot-level data don't have duplicates
select(veg, Aufnahmenr, Ort, Vegetation, WasserHH, Bodenfeuchte) %>%
n_distinct() # should equal the number of plots (n = 8)
# make sure species are not duplicated within plots
nrow(veg) # 141
select(veg, Aufnahmenr, WissName) %>%
n_distinct() # should equal the number of rows (n = 141)
# make sure species-level data (from databases) are unique
select(veg, WissName) %>%
n_distinct() # 79
select(veg, WissName, Lebensform, Germinulenmasse, Bestaeubung, Feuchte) %>%
n_distinct() # should equal the number of species (n = 79)
# check life form
table(veg$Lebensform)
filter(veg, Lebensform == "G") %>%
pull(WissName) %>%
unique()
veg %<>% mutate(Lebensform = case_when(
WissName == "Phalaris arundinacea" ~ "mL",
TRUE ~ Lebensform
))
# check % cover per species
hist(veg$Deckung)
# check total % cover per plot
group_by(veg, Aufnahmenr) %>%
summarise(Deckung.tot = sum(Deckung))
# check vegetation types at each site
table(veg$Ort, veg$Vegetation)
# check if Ellenberg humidity indicator values are integers
unique(veg$Feuchte)
unique(as.integer(veg$Feuchte))
veg %<>%
mutate(
Feuchte = stringr::str_remove_all(Feuchte, pattern = "~|x|=") %>% # remove non-numeric symbols
as.integer() # convert to integer
)
veg[veg$Fortlaufendenr == 41, "Feuchte"] <- NA
# check taxon names
tax <- taxize::gnr_resolve(veg$WissName, data_source_id = 1, fields = "all", best_match_only = TRUE)
View(tax)
veg %<>%
mutate(
WissName = recode(
WissName,
`Taraxacum officinalis` = "Taraxacum officinale",
`Viola arvense` = "Viola arvensis"
)
)
veg[veg$Fortlaufendenr == 41, "WissName"] <- "Agrostis capillaris"
# check leaf area
summary(veg$Blattflaeche)
veg %<>% mutate(Blattflaeche = na_if(Blattflaeche, 0))
# check seed mass
summary(veg$Germinulenmasse)
veg %>%
filter(Germinulenmasse == .001) %>%
pull(WissName) %>%
unique()
View(available_tr8)
gmasse_tr8 <- TR8::tr8(
species_list = c("Agrostis stolonifera", "Cerastium semidecandrum"),
download_list = c("seed_wght", "SeedMass", "seed_mass"),
allow_persistent = TRUE
)
gmasse_tr8
veg %<>%
mutate(Germinulenmasse = case_when(
WissName == "Cerastium semidecandrum" ~ .04,
WissName == "Agrostis stolonifera" ~ .07,
Fortlaufendenr == 41 ~ .1,
TRUE ~ Germinulenmasse
))
# take a look at the data again
skim(veg)
# impute missing trait data ------------------------------------------------------------------------
# life form (data from FloraWeb)
select(veg, WissName, Lebensform) %>%
distinct() %>%
filter(is.na(Lebensform))
veg %<>%
mutate(Lebensform = case_when(
WissName %in% c("Cerastium glomeratum", "Erophila verna") ~ "mL",
TRUE ~ Lebensform
))
# pollination type (data from FloraWeb)
select(veg, WissName, Bestaeubung) %>%
distinct() %>%
filter(is.na(Bestaeubung))
veg %<>%
mutate(Bestaeubung = case_when(
WissName %in% c(
"Cerastium glomeratum", "Erophila verna", "Valerianella spec."
) ~ "se",
TRUE ~ Bestaeubung
))
# seed mass (data from Seed Information Database)
select(veg, WissName, Germinulenmasse) %>%
distinct() %>%
filter(is.na(Germinulenmasse))
veg %<>%
mutate(Germinulenmasse = case_when(
WissName == "Allium vineale" ~ 7,
WissName == "Carex ovalis" ~ .52,
WissName == "Cerastium glomeratum" ~ .05,
WissName == "Cerastium glutinosum" ~ .0654,
WissName == "Cruciata laevipes" ~ 3.5856,
WissName == "Erodium cicutarium" ~ 2,
WissName == "Erophila verna" ~ .024,
WissName == "Senecio erucifolius" ~ .3935,
WissName == "Saxifraga granulata" ~ .03,
WissName == "Prunus mahaleb" ~ 79.9,
WissName == "Polygonum persicaria" ~ 2.1,
TRUE ~ Germinulenmasse
))
summary(veg$Germinulenmasse)
# convert life form and pollination to factor
veg %<>% mutate(across(c(Lebensform, Bestaeubung), ~ as.factor(.)))
# calculate SLA & LDMC -----------------------------------------------------------------------------
veg %<>%
mutate(
SLA = Blattflaeche / Trockenmasse,
LDMC = Trockenmasse / Frischmasse
)
# check SLA
hist(veg$SLA)
# check LDMC
hist(veg$LDMC)
filter(veg, LDMC > 500) %>%
select(Fortlaufendenr, WissName, Deckung, Blattflaeche, Frischmasse, Trockenmasse, LDMC)
# make corrections in the data
veg %<>%
mutate(
Frischmasse = if_else(Fortlaufendenr == 127, .2063, Frischmasse),
Trockenmasse = if_else(Fortlaufendenr == 79, 85, Trockenmasse) # it's a guess!
)
# recalculate SLA & LDMC
veg %<>%
mutate(
SLA = Blattflaeche / Trockenmasse,
LDMC = Trockenmasse / Frischmasse
)
# analyse species numbers --------------------------------------------------------------------------
# check the number of species per plot
artz1 <- table(veg$Aufnahmenr)
artz1
# check the number of species at each water availability level
artz2 <- table(veg$WasserHH)
artz2
# test whether the observed species numbers differ from the expected
chisq.test(artz2)
chisq.test(artz2)$expected
barplot(artz2,
xlab = "geschätzte Wasserverfügbarkeit (ordinal)",
ylab = "Artenzahl"
)
# analyse water availability -----------------------------------------------------------------------
# check the correspondence between the Ellenberg values and estimated water availability
boxplot(veg$Feuchte ~ veg$WasserHH)
# calculate the mean plot-level Ellenberg humidity indicator value
WasserHH.Feuchte <- group_by(veg, Aufnahmenr, WasserHH, Bodenfeuchte) %>%
summarise(
# community mean
Feuchte_CM = mean(Feuchte, na.rm = TRUE) %>%
round(., 1),
# community weighted mean
Feuchte_CWM = weighted.mean(x = Feuchte, w = Deckung, na.rm = TRUE) %>%
round(., 1)
)
# plot the plot-level Ellenberg humidity
op <- par(mfcol = c(1, 2), mar = c(4, 4, 2, 2))
plot(Feuchte_CWM ~ WasserHH,
data = WasserHH.Feuchte, col = "red",
pch = 20, ylab = "mittlere Feuchte nach Ellenberg"
)
points(WasserHH.Feuchte$WasserHH, WasserHH.Feuchte$Feuchte_CM, col = "green", pch = 20)
legend(2, 7, c("CWM", "CM"), col = c("red", "green"), pch = 20)
plot(Feuchte_CWM ~ Bodenfeuchte, data = WasserHH.Feuchte, col = "red", pch = 20, ylab = "")
points(WasserHH.Feuchte$Bodenfeuchte, WasserHH.Feuchte$Feuchte_CM, col = "green", pch = 20)
legend(2, 7, c("CWM", "CM"), col = c("red", "green"), pch = 20)
par(op)
# calculate correlation between the water availability and plot-level Ellenberg humidity
cor.test(WasserHH.Feuchte$WasserHH, WasserHH.Feuchte$Feuchte_CM, method = "kendall", exact = FALSE)
cor.test(WasserHH.Feuchte$WasserHH, WasserHH.Feuchte$Feuchte_CWM, method = "kendall", exact = FALSE)
cor.test(WasserHH.Feuchte$Bodenfeuchte, WasserHH.Feuchte$Feuchte_CWM,
method = "kendall", exact = FALSE
)
# add the calculated mean Ellenberg humidity to the main data table
veg %<>% left_join(WasserHH.Feuchte)
glimpse(veg)
# analyse LDMC -------------------------------------------------------------------------------------
# LDMC = leaf dry matter content
# inspect the distribution of LDMC visually
hist(veg$LDMC)
# plot LDMC across water availability levels
boxplot(
LDMC ~ WasserHH,
dat = veg, ylab = "LDMC", xlab = "geschaetzte Wasserverfuegbarkeit (ordinal)",
col = rgb(r = 0, g = 0, b = 1, alpha = seq(.1, .9, length.out = 7)), log = "y"
)
boxplot(LDMC ~ Feuchte_CWM,
dat = veg, ylab = "LDMC", xlab = "mittlere Feuchte nach Ellenberg",
x.axis = 1:8, col = rgb(r = 0, g = 0, b = 1, alpha = seq(.1, .9, length.out = 7)), log = "y"
)
ggplot(veg, aes(x = Bodenfeuchte, y = LDMC)) +
geom_point(na.rm = TRUE) +
stat_smooth(method = "lm", col = "#E69F00", se = FALSE, na.rm = TRUE) +
labs(x = "Bodenfeuchte [%]") +
theme_minimal()
# perform regression analyses
# using WasserHH as explanatory variable
fm0.LDMC <- lm(LDMC ~ WasserHH, data = veg)
summary(fm0.LDMC)
performance::check_model(fm0.LDMC)
fm1.LDMC <- lm(LDMC ~ WasserHH, weight = Deckung, data = veg) # weighted by % cover
summary(fm1.LDMC)
check_model(fm1.LDMC)
# compare the two models
tbl_merge(
tbls = list(tbl_regression(fm0.LDMC), tbl_regression(fm1.LDMC)),
tab_spanner = c("**Non-weighted**", "**Weigted**")
)
# using soil moisture (Bodenfeuchte) as explanatory variable
fm0.LDMC.b <- lm(LDMC ~ Bodenfeuchte, data = veg)
summary(fm0.LDMC.b)
check_model(fm0.LDMC.b)
fm1.LDMC.b <- lm(LDMC ~ Bodenfeuchte, weight = Deckung, data = veg)
summary(fm1.LDMC.b)
check_model(fm1.LDMC.b)
# compare the two models
tbl_merge(
tbls = list(tbl_regression(fm0.LDMC.b), tbl_regression(fm1.LDMC.b)),
tab_spanner = c("**Non-weighted**", "**Weigted**")
)
plot1 <- ggplot(veg, aes(x = Bodenfeuchte, y = LDMC)) +
geom_point(na.rm = TRUE) +
geom_abline(
intercept = fm0.LDMC.b$coefficients[1], slope = fm0.LDMC.b$coefficients[2],
col = "#E69F00", size = 1, na.rm = TRUE
) +
geom_abline(
intercept = fm1.LDMC.b$coefficients[1], slope = fm1.LDMC.b$coefficients[2],
col = "#56B4E9", size = 1, na.rm = TRUE
) +
labs(x = "Bodenfeuchte [%]") +
theme_minimal() +
annotate("text", x = 20, y = 270, label = "Non-weighted", col = "#E69F00") +
annotate("text", x = 25, y = 320, label = "Weighted", col = "#56B4E9")
plot1
# analyse SLA --------------------------------------------------------------------------------------
# SLA = specific leaf area
# inspect the distribution of SLA visually
hist(veg$SLA)
hist(log(veg$SLA))
# test the normality of distribution
shapiro_test(veg$SLA)
shapiro_test(log(veg$SLA))
# add log-transformed values to the main data table
veg %<>% mutate(SLA.log = log(SLA))
# plot the distribution of SLA across water availability levels
op <- par(mar = c(4, 5, 2, 3))
boxplot(SLA ~ WasserHH,
dat = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)",
ylab = "SLA", col = "blue", log = "y"
)
filter(veg, WasserHH == 2 & SLA > 25) %>%
select(WissName, Aufnahmenr, SLA)
filter(veg, WissName == "Euphorbia cyparissias") %>%
select(WissName, Aufnahmenr, SLA)
boxplot(SLA.log ~ WasserHH,
dat = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)",
ylab = expression(log("SLA")), col = "blue"
)
par(op)
filter(veg, WasserHH == 7 & SLA.log < 3) %>%
select(WissName, Aufnahmenr, SLA)
# perform regression analyses
fm1.SLA <- lm(SLA ~ WasserHH, weight = Deckung, data = veg)
summary(fm1.SLA)
check_model(fm1.SLA)
fm1.SLA.log <- lm(SLA.log ~ WasserHH, weight = Deckung, data = veg)
summary(fm1.SLA.log)
check_model(fm1.SLA.log)
fm1.SLA.log.b <- lm(SLA.log ~ Bodenfeuchte, weight = Deckung, data = veg)
summary(fm1.SLA.log.b)
check_model(fm1.SLA.log.b)
# plot the regression line
plot2 <- ggplot(veg, aes(x = Bodenfeuchte, y = SLA.log)) +
geom_point(na.rm = TRUE) +
geom_abline(
intercept = fm1.SLA.log.b$coefficients[1], slope = fm1.SLA.log.b$coefficients[2],
col = "#56B4E9", size = 1, na.rm = TRUE
) +
labs(x = "Bodenfeuchte [%]", y = expression(log("SLA"))) +
theme_minimal()
plot2
# look at the relationship between SLA and LDMC
plot(SLA ~ LDMC, data = veg)
cor.test(veg$SLA, veg$LDMC, use = "complete.obs")
# analyse height -----------------------------------------------------------------------------------
# inspect distributions visually
hist(veg$Hoehe.avg)
hist(log(veg$Hoehe.avg))
# test the normality of distribution
shapiro_test(veg$Hoehe.avg)
shapiro_test(log(veg$Hoehe.avg))
# add log-transformed values to the main data table
veg %<>% mutate(Hoehe.avg.log = log(Hoehe.avg))
# plot the distribution of height across water availability levels
op <- par(mar = c(4, 5, 2, 3))
boxplot(Hoehe.avg ~ WasserHH,
data = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)",
ylab = "Wuchshöhe [cm]", col = rainbow(7, start = 1, end = 5 / 7),
log = "y"
)
filter(veg, WasserHH == 6 & Hoehe.avg > 50) %>%
select(WissName, Aufnahmenr, Hoehe.avg)
filter(veg, WasserHH == 8 & Hoehe.avg < 20) %>%
select(WissName, Aufnahmenr, Hoehe.avg)
boxplot(Hoehe.avg.log ~ WasserHH,
data = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)",
ylab = expression(log("Wuchshöhe [cm]")), col = rainbow(7, start = 1, end = 5 / 7)
)
par(op)
# perform regression analysis
fm1.hoehe.avg.log <- lm(Hoehe.avg.log ~ WasserHH, weight = Deckung, data = veg)
summary(fm1.hoehe.avg.log)
check_model(fm1.hoehe.avg.log)
fm1.hoehe.avg.log.b <- lm(Hoehe.avg.log ~ Bodenfeuchte, weight = Deckung, data = veg)
summary(fm1.hoehe.avg.log.b)
check_model(fm1.hoehe.avg.log.b)
# plot the regression line
plot3 <- ggplot(veg, aes(x = Bodenfeuchte, y = Hoehe.avg.log)) +
geom_point(na.rm = TRUE) +
geom_abline(
intercept = fm1.hoehe.avg.log.b$coefficients[1],
slope = fm1.hoehe.avg.log.b$coefficients[2],
col = "#56B4E9", size = 1, na.rm = TRUE
) +
geom_smooth(col = "red", se = FALSE, na.rm = TRUE) +
labs(x = "Bodenfeuchte [%]", y = expression(log("Wuchshöhe [cm]"))) +
theme_minimal()
plot3
# analyse height variance --------------------------------------------------------------------------
# plot height variance
hist(veg$Hoehe.var)
hist(log(veg$Hoehe.var))
# add log-transformed values to the main data table
veg %<>% mutate(Hoehe.var.log = log(Hoehe.var))
# check the relationship between height and height variance
boxplot(Hoehe.var.log ~ WasserHH, data = veg)
plot(Hoehe.var.log ~ Hoehe.avg, data = veg)
cor.test(veg$Hoehe.var.log, veg$Hoehe.avg)
# calculate the coefficient of variation
veg %<>% mutate(Hoehe.cv = sqrt(Hoehe.var) / Hoehe.avg * 100)
hist(veg$Hoehe.cv)
hist(log(veg$Hoehe.cv))
veg %<>% mutate(Hoehe.cv.log = log(Hoehe.cv))
plot(Hoehe.cv.log ~ Hoehe.avg, data = veg)
cor.test(veg$Hoehe.cv.log, veg$Hoehe.avg)
boxplot(Hoehe.cv ~ WasserHH,
data = veg, xlab = "geschätzte Wasserverfügbarkeit (ordinal)",
ylab = "Variationskoeffizient der Wuchshöhe", log = "y", col = topo.colors(7)[7:1]
)
filter(veg, WasserHH == 4 & Hoehe.cv > 100) %>%
select(WissName, Aufnahmenr, Hoehe.avg, Hoehe.var, Hoehe.cv)
# perform regression analysis
fm1.Hoehe.cv.log <- lm(Hoehe.cv.log ~ WasserHH, data = veg, weight = Deckung)
summary(fm1.Hoehe.cv.log)
check_model(fm1.Hoehe.cv.log)
fm1.Hoehe.cv.log.b <- lm(Hoehe.cv.log ~ Bodenfeuchte, data = veg, weight = Deckung)
summary(fm1.Hoehe.cv.log.b)
check_model(fm1.Hoehe.cv.log.b)
# analyse seed mass --------------------------------------------------------------------------------
hist(veg$Germinulenmasse)
hist(log(veg$Germinulenmasse))
veg %<>% mutate(Germinulenmasse.log = log(Germinulenmasse))
op <- par(mar = c(4, 5, 2, 3))
boxplot(Germinulenmasse ~ WasserHH, data = veg, log = "y")
boxplot(Germinulenmasse.log ~ WasserHH, data = veg, ylab = expression(log("Germinulenmasse")))
filter(veg, Germinulenmasse.log > 4) %>%
select(WissName, Germinulenmasse)
par(op)
# perform regression analysis
fm1.gmasse.log <- lm(Germinulenmasse.log ~ WasserHH, data = veg, weight = Deckung)
summary(fm1.gmasse.log)
check_model(fm1.gmasse.log)
fm1.gmasse.log.b <- lm(Germinulenmasse.log ~ Bodenfeuchte, data = veg, weight = Deckung)
summary(fm1.gmasse.log.b)
check_model(fm1.gmasse.log.b)
# analyse life form --------------------------------------------------------------------------------
# compare the distributions of life forms across vegetation types
select(veg, Lebensform, Vegetation) %>%
gtsummary::tbl_summary(by = Vegetation) %>%
add_p()
# calculate total % cover per life form across humidity levels
lft.1 <- group_by(veg, Aufnahmenr, WasserHH, Lebensform) %>%
summarise(Deckung = sum(Deckung))
head(lft.1)
tail(lft.1)
lft.2 <- xtabs(Deckung ~ Lebensform + WasserHH, data = lft.1)
lft.2 <- DescTools::as.matrix.xtabs(lft.3)
lft.2
chisq_test(lft.2, simulate.p.value = TRUE)
# plot the distribution of life forms across humidity levels
# using base R
op <- par(mar = c(4, 5, 2, 3))
barplot(lft.3,
xlab = "geschätzte Wasserverfügbarkeit (ordinal)",
ylab = expression(sum("Deckung")),
main = "Lebensformenspektren", ylim = c(0, 160)
)
legend(8.5, 150, legend = row.names(lft.3), pch = 15, col = grey.colors(ncol(lft.3)))
par(op)
# using ggplot2
ggplot(data = lft.1, aes(fill = Lebensform, y = Deckung, x = ordered(WasserHH))) +
geom_bar(position = "stack", stat = "identity") +
viridis::scale_fill_viridis(discrete = TRUE) +
labs(x = "geschätzte Wasserverfügbarkeit (ordinal)", y = expression(sum("Deckung"))) +
ggtitle("Lebensformenspektren") +
theme_minimal()
# analyse pollination type -------------------------------------------------------------------------
# compare the distributions of pollination syndromes across vegetation types
select(veg, Bestaeubung, Vegetation) %>%
gtsummary::tbl_summary(by = Vegetation) %>%
add_p()
# calculate total % cover per pollination type across humidity levels
bst.1 <- group_by(veg, Aufnahmenr, WasserHH, Bestaeubung) %>%
summarise(Deckung = sum(Deckung))
head(bst.1)
tail(bst.1)
bst.2 <- xtabs(Deckung ~ Bestaeubung + WasserHH, data = bst.1)
bst.2 <- DescTools::as.matrix.xtabs(bst.2)
bst.2
chisq_test(bst.2, simulate.p.value = TRUE)
# plot the distribution of pollination types
# using base R
op <- par(mar = c(4, 5, 2, 3))
cols <- c("yellow", "brown", "grey", "lightblue")
barplot(bst.3,
xlab = "geschätzte Wasserverfügbarkeit (ordinal)", ylab = expression(sum("Deckung")),
main = "Bestäubungstypenspektren",
sub = expression(Chi^2 * "= 243, p < 0.001"), cex.sub = 0.8, col = cols
)
legend(0, 140,
legend = c("Wind", "Selbst", "multiple", "Insekten"),
pch = 15, col = cols[4:1], cex = 0.7
)
# using ggplot2
plot4 <- ggplot(data = bst.1, aes(fill = Bestaeubung, y = Deckung, x = ordered(WasserHH))) +
geom_bar(position = "stack", stat = "identity") +
viridis::scale_fill_viridis(
discrete = TRUE, labels = c("Insekten", "multiple", "Selbst", "Wind")
) +
labs(x = "geschätzte Wasserverfügbarkeit (ordinal)", y = expression(sum("Deckung"))) +
ggtitle("Bestäubungstypenspektren") +
theme_minimal()
plot4
# analyse the relationships between traits ---------------------------------------------------------
# test if height varies across life forms
# plot height by life form
ggpubr::ggboxplot(veg, x = "Lebensform", y = "Hoehe.avg.log", select = c("H", "G", "T", "mL"))
# check the ANOVA assumptions
# outliers
group_by(veg, Lebensform) %>%
identify_outliers(Hoehe.avg.log)
# normality
lm.hoe.lf <- lm(Hoehe.avg.log ~ Lebensform, data = veg)
ggpubr::ggqqplot(residuals(lm.hoe.lf))
rstatix::shapiro_test(residuals(lm.hoe.lf))
# homogeneity of variances
levene_test(veg, Hoehe.avg.log ~ Lebensform)
# perform ANOVA and the Kruskal-Wallis test (non-parametric alternative)
anova_test(veg, Hoehe.avg.log ~ Lebensform)
# because the normality of residuals assumption was violated
# the Kruskal Wallis test should be used instead of a standard ANOVA
kruskal_test(veg, Hoehe.avg.log ~ Lebensform)
# test if the height varies across pollination types
veg$Bestaeubung <- as.factor(veg$Bestaeubung)
ggboxplot(veg, x = "Bestaeubung", y = "Hoehe.avg.log")
group_by(veg, Bestaeubung) %>%
identify_outliers(Hoehe.avg.log) %>%
select(Fortlaufendenr, WissName, Hoehe.avg, Hoehe.avg.log, is.outlier, is.extreme)
lm.hoe.best <- lm(Hoehe.avg.log ~ Bestaeubung, data = veg)
ggqqplot(residuals(lm.hoe.best))
shapiro_test(residuals(lm.hoe.best))
levene_test(veg, Hoehe.avg.log ~ Bestaeubung)
# because the normality of residuals assumption was violated
# we use the Kruskal Wallis test instead of a standard ANOVA
# and the Dunn's post hoc test for pairwise comparisons
hoe.best.kw <- kruskal_test(veg, Hoehe.avg.log ~ Bestaeubung)
hoe.best.kw
hoe.best.pwc <- dunn_test(veg, Hoehe.avg.log ~ Bestaeubung)
hoe.best.pwc
# plot the results
hoe.best.pwc %<>% add_xy_position(x = "Bestaeubung")
plot6 <- ggboxplot(veg, x = "Bestaeubung", y = "Germinulenmasse.log") +
stat_pvalue_manual(hoe.best.pwc, hide.ns = TRUE) +
labs(
subtitle = get_test_label(hoe.best.kw, detailed = TRUE),
caption = get_pwc_label(hoe.best.pwc),
y = expression(log("Wuchshöhe")),
x = "Bestäubungstyp"
)
plot6
# test if the SLA varies across life forms
ggboxplot(veg, x = "Lebensform", y = "SLA.log", select = c("H", "G", "T", "mL"))
group_by(veg, Lebensform) %>%
identify_outliers(SLA.log)
lm.sla.lf <- lm(SLA.log ~ Lebensform, data = veg)
ggqqplot(residuals(lm.sla.lf))
shapiro_test(residuals(lm.sla.lf))
levene_test(veg, SLA.log ~ Lebensform)
anova_test(veg, SLA.log ~ Lebensform)
# test if the seed mass varies across life forms
gmasse <- select(veg, WissName, Germinulenmasse.log, Lebensform, Bestaeubung) %>%
distinct()
ggboxplot(gmasse, x = "Lebensform", y = "Germinulenmasse.log")
group_by(gmasse, Lebensform) %>%
identify_outliers(Germinulenmasse.log)
lm.gmasse.lf <- lm(Germinulenmasse.log ~ Lebensform, data = gmasse)
ggqqplot(residuals(lm.gmasse.lf))
shapiro_test(residuals(lm.gmasse.lf))
levene_test(gmasse, Germinulenmasse.log ~ Lebensform)
gmasse.lf.aov <- anova_test(gmasse, Germinulenmasse.log ~ Lebensform)
gmasse.lf.aov
gmasse.lf.pwc <- tukey_hsd(gmasse, Germinulenmasse.log ~ Lebensform)
gmasse.lf.pwc
gmasse.lf.pwc %<>% add_xy_position(x = "Lebensform")
plot7 <- ggboxplot(gmasse, x = "Lebensform", y = "Germinulenmasse.log") +
stat_pvalue_manual(gmasse.lf.pwc, hide.ns = TRUE) +
labs(
subtitle = get_test_label(gmasse.lf.aov, detailed = TRUE),
caption = get_pwc_label(gmasse.lf.pwc),
y = expression(log("Germinulenmasse")),
x = "Lebensform"
)
plot7
# test if the seed mass varies across pollination types
ggboxplot(gmasse, x = "Bestaeubung", y = "Germinulenmasse.log")
group_by(gmasse, Bestaeubung) %>%
identify_outliers(Germinulenmasse.log)
fm.gmasse.best <- lm(Germinulenmasse.log ~ Bestaeubung, data = gmasse)
ggpubr::ggqqplot(residuals(fm.gmasse.best))
rstatix::shapiro_test(residuals(fm.gmasse.best))
ggqqplot(gmasse, "Germinulenmasse.log", facet.by = "Bestaeubung")
levene_test(gmasse, Germinulenmasse.log ~ Bestaeubung)
# because the homogeneity of variance assumption was violated
# we use the Welch test instead of a standard ANOVA
# and the Games-Howell post hoc test for pairwise comparisons
gmasse.best.aov <- welch_anova_test(gmasse, Germinulenmasse.log ~ Bestaeubung)
gmasse.best.aov
gmasse.best.pwc <- games_howell_test(gmasse, Germinulenmasse.log ~ Bestaeubung)
gmasse.best.pwc
gmasse.best.pwc %<>% add_xy_position(x = "Bestaeubung")
ggboxplot(gmasse, x = "Bestaeubung", y = "Germinulenmasse.log") +
stat_pvalue_manual(gmasse.best.pwc, hide.ns = TRUE) +
labs(
subtitle = get_test_label(gmasse.best.aov, detailed = TRUE),
caption = get_pwc_label(gmasse.best.pwc),
y = expression(log("Germinulenmasse")),
x = "Bestäubungstyp"
)
# save the edited data as a separate file
write_csv2(veg, here("veg_data_2020_edited.csv"))
# analyse species composition ----------------------------------------------------------------------
veg <- read_csv2(here("veg_data_2022_edited.csv"))
mat <- xtabs(Deckung ~ Aufnahmenr + WissName, data = veg)
str(mat)
# perform correspondence analysis
decorana(mat)
fm.ca <- cca(mat)
fm.ca
summary(fm.ca)
plot(fm.ca, display = "sites")
plot(fm.ca)
# shorten taxon names
wname <- str_remove(veg$WissName, pattern = " cf\\.| x") %>%
str_split(pattern = " ", simplify = TRUE) %>%
substr(., 1, 3)
wname <- paste(wname[, 1], wname[, 2], sep = ".")
veg %<>% mutate(wname = wname)
# repeat the same analysis with shortened taxon names provided
mat <- xtabs(Deckung ~ Aufnahmenr + wname, data = veg)
fm.ca <- cca(mat)
summary(fm.ca)
# plot the results
summary(fm.ca)$species %>%
as_tibble() %>%
mutate(wname = rownames(summary(fm.ca)$species)) %>%
ggplot(aes(x = CA1, y = CA2, label = wname)) +
geom_point() +
ggrepel::geom_text_repel(size = 1.5, force = 15, max.overlaps = 30) +
theme_bw()
# prepare data for trait ordination ----------------------------------------------------------------
# calculate the proportion of each life form per plot
lf.kt <- select(veg, Aufnahmenr, Deckung, Lebensform) %>%
pivot_wider(
names_from = Lebensform, values_from = Deckung,
values_fn = sum, values_fill = 0, names_prefix = "lf."
) %>%
mutate(total = rowSums(.[, -1])) %>%
mutate(across(lf.H:lf.M, ~ . / total), .keep = "unused")
# calculate the proportion of each pollination type per plot
best.kt <- select(veg, Aufnahmenr, Deckung, Bestaeubung) %>%
pivot_wider(
names_from = Bestaeubung, values_from = Deckung,
values_fn = sum, values_fill = 0, names_prefix = "b."
) %>%
mutate(total = rowSums(.[, -1])) %>%
mutate(across(b.wi:b.mb, ~ . / total), .keep = "unused")
# compute community weighted means for numerical traits
cwm <- group_by(veg, Aufnahmenr) %>%
summarise(across(
c(Hoehe.avg.log, Hoehe.cv.log, LDMC, SLA.log, Germinulenmasse.log),
~ weighted.mean(., w = Deckung, na.rm = TRUE)
)) %>%
rename(
Ho.av = Hoehe.avg.log,
Ho.cv = Hoehe.cv.log,
SLA = SLA.log,
GM = Germinulenmasse.log
)
# combine the three tables
cwm %<>%
left_join(lf.kt) %>%
left_join(best.kt)
cwm
# perform principal components analysis (PCA) of CWMs ----------------------------------------------
# analyse all traits
pca1 <- FactoMineR::PCA(cwm[, -c(1:2)])
summary(pca1)
cowplot::plot_grid(
plot.PCA(pca1),
plot.PCA(pca1, c(1, 3))
)
# analyse numerical traits only
colnames(cwm)
pca2 <- PCA(select(cwm, Ho.av:GM))
cowplot::plot_grid(
plot.PCA(pca2),
plot.PCA(pca2, c(1, 3))
)
# perform PCA of species traits --------------------------------------------------------------------
# create a separate table containing numerical traits
dat <- select(
veg, WissName, Vegetation, Blattflaeche, Frischmasse, Trockenmasse,
SLA, LDMC, Hoehe.avg, Hoehe.var, Germinulenmasse
) %>%
rename(
BlF = Blattflaeche,
FrM = Frischmasse,
TrM = Trockenmasse,
Ho.av = Hoehe.avg,
Ho.var = Hoehe.var,
GM = Germinulenmasse
) %>%
na.omit()
glimpse(dat)
# perform PCA
pca3 <- FactoMineR::PCA(dat[, -c(1:2)])
summary(pca3)
plot.PCA(pca3)
# add PC to the trait table
dat$pc1 <- pca3$ind$coord[, 1]
dat$pc2 <- pca3$ind$coord[, 2]
pc12 <- pca3$ind$coord[, 1:2]
# plot the PCA results
ggplot(data = dat, aes(x = pc1, y = pc2, color = Vegetation, shape = Vegetation)) +
geom_hline(yintercept = 0, lty = 2) +
geom_vline(xintercept = 0, lty = 2) +
geom_point(alpha = .8) +
stat_ellipse(
geom = "polygon", aes(fill = Vegetation), alpha = .2,
show.legend = FALSE, level = .95
) +
guides(
color = guide_legend(title = "Vegetation"),
shape = guide_legend(title = "Vegetation")
) +
labs(x = "PC 1 (45.24%)", y = "PC 2 (20.27%)") +
theme_minimal() +
theme(
panel.grid = element_blank(),
panel.border = element_rect(fill = "transparent"),
legend.position = "bottom"
)
# plot PCA results using kernels
# Code from Björn Reu
# Used to produce Fig. 2 in Diaz et al. 2016, Nature
H <- Hpi(x = pc12) # optimal bandwidth estimation
est <- kde(x = pc12, H = H, compute.cont = TRUE) # kernel density estimation
# set contour probabilities for drawing contour levels
cl <- contourLevels(est, prob = c(.5, .05, .001), approx = TRUE)
# use envfit for drawing arrows, can be also done using trait loadings
fit <- envfit(pc12, select(dat, BlF:GM))
# create a plot
par(mar = c(4, 4, 2, 2))
cols <- ifelse(dat$Vegetation == "TR", "darkred", "darkblue")
plot(est,
cont = seq(1, 100, by = 1), display = "filled.contour2", add = FALSE, ylab = "", xlab = "",
cex.axis = .75, ylim = c(-5, 4), xlim = c(-3, 7), las = 1
)
plot(est, abs.cont = cl[1], labels = .5, labcex = .75, add = TRUE, lwd = .75, col = "grey30")
plot(est, abs.cont = cl[2], labels = .95, labcex = .75, add = TRUE, lwd = .5, col = "grey60")
plot(est, abs.cont = cl[3], labels = .99, labcex = .75, add = TRUE, lwd = .5, col = "grey60")
points(pc12, pch = 16, cex = .5, col = cols)
plot(fit, cex = .90, col = 1)
mtext("PC 1 (45.24%)", cex = .75, side = 1, line = 1.8)
mtext("PC 2 (20.27%)", cex = .75, side = 2, line = 1.8)
dat[dat$pc1 > 4, ]
dat[dat$pc2 < -3, ]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment