Skip to content

Instantly share code, notes, and snippets.

@eduardofox2
Last active April 20, 2020 23:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eduardofox2/6dd03280d1838e482330a4a0ff8093be to your computer and use it in GitHub Desktop.
Save eduardofox2/6dd03280d1838e482330a4a0ff8093be to your computer and use it in GitHub Desktop.
R script containing raw data, analysis and plotting of Figures in the scientific publication 'The psyllid Diaphorina citri as a vector of Citrus tristeza virus' by Wu et. al [in press]
#Written by Eduardo G. P. Fox in Rio de Janeiro, Brazil, 2018.
#Data pertains infection rates and relative viral DNA titers in host plants and insect vector of CTV citrus 'tristeza' virus
#Useful functions applied
#List preserving object names, from https://stackoverflow.com/questions/16951080/can-lists-be-created-that-name-themselves-based-on-input-object-names, credit: @BenBolker
namedList <- function(...) {
L <- list(...)
snm <- sapply(substitute(list(...)),deparse)[-1]
if (is.null(nm <- names(L))) nm <- snm
if (any(nonames <- nm=="")) nm[nonames] <- snm[nonames]
setNames(L,nm)
}
# Multiple plot function obtained from R CookBook Website
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols: Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
#Mind you can Plot images with aesthetics corrections!
#***************************************************************************************
#Relevant packages
require(reshape2)
require(ggplot2)
require(dplyr)
require(tidyr)
#Raw Data input & formatting
#Figure 2. Persistence of Citrus tristeza virus (CTV) infection by host plant species.
#Fig 2(A) CTV infection rates in D. citri
Fig2A<-list(
rates.AAP.0d_reticulata =c(100, 100, 100, 90, 70, 70, 80, 70),
rates.AAP.5d_reticulata =c(70, 50, 50, 80, 80, 80, 90, 70, 80, 70, 80, 70, 80, 70, 80, 70),
rates.AAP.15d_reticulata=c(90, 50, 40, 80, 40, 70, 90, 60),
rates.AAP.5d_paniculata =c(50, 70, 40, 10, 20, 30, 20, 20),
rates.AAP.15d_paniculata=c(10, 0, 20, 0, 0, 10, 0, 0)
)
#preparing long format
long.Fig2A<-melt(Fig2A)
#Fig 2(B) CTV viral DNA titers in infected D. citri as obtained by qPCR
titers.AAP.0d_reticulata<-c(24045.8015267176, 33964.3347050754, 10459.1104734577, 31956.8822553897, 4246.03174603175, 25221.2389380531,
3841.02564102564, 9272.95918367347, 22259.8870056497, 15584.2185128983, 7183.79446640316, 2896.44432909421, 3171.75094024237,
1329.72108722686, 1878.72662961091, 491.974442886084, 7070.33751028995, 817.520080321285, 701.823050777629, 2443.15941648048,
966.047226491547, 226.481751824818, 23748.0559875583, 848.081264108352, 2338.36477987421, 7380.71065989848, 17184.8739495798,
17503.5868005739, 5708.84146341463, 27704.5908183633, 9771.80456390872, 29233.2038555604, 875.034984606773, 59184.2620249253,
496.512292738708, 774.275362318841, 10550.0593589236, 8152.64445699228, 23046.9074304691, 7654.7433903577, 48752.0201113306,
34409.2623454083, 2300.13550135501, 5713.9750652363, 8808.32735104092, 7690.66937119675, 4144.64534075104, 7794.03905447071,
3132.11845102506, 1949.76076555024, 47733.860342556, 7506.57894736842, 2866.2109375, 12545.12, 9877.25, 9994.01, 14497.58,
6584.25, 21145.35, 15583.35, 8654.22, 11254.35, 18931.01, 14751.24, 8856.25, 9889.25, 5848.47, 14458.22)
titers.AAP.5d_reticulata<-c(2900.79641148325, 4424.62926829268, 6979.94923857868, 9778.84615384615, 5480.92783505155, 2918.21815286624,
13690.3223880597, 12977.3365231259, 2219.64245810056, 9166.0705882353, 10051.5703971119, 4413.39590443686, 4456.45627376426,
3607.13716108453, 9970.33333333333, 6514.28571428571, 2526.09438202247, 1803.8209432454, 8258.48806366048, 5992.18827415359,
9979.98308906426, 9504.24151696607, 7426.82519280206, 16240.3315649867, 20101.8426966292, 5568.25, 10255.25, 6877.22, 4987.99,
3658.58, 8225.36, 8744.15, 6856, 6445.08, 6899.36, 5785.14, 9875.36, 12336.08, 10545.25, 9845.12, 4588.25, 7896.35, 8975.056,
10254.92, 6584.55, 9786.54, 6458.25, 8952.25, 13578.06, 8854.56, 7754.25, 7532.75, 5644.8, 6658.25, 5554.24, 7555.01, 8810.73,
5734.89, 3911.04, 4617.80, 5221.06, 4414.15, 5785.21, 4745.08, 2849.93, 3582.04, 10327.93, 1032.38, 8515.22, 6951.12, 6138.83,
7816.05, 10915.60, 8254.62, 6481.55, 5886.23, 5895.05, 11912.86, 11508.16, 9854.06, 6751.15, 6831.05, 6644.80, 7798.15, 6553.15,
6355.51, 7824.80)
titers.AAP.15d_reticulata<-c(1951.14523875878, 4888.59022594451, 3879.20863437825, 8037.70242256264, 4724.91690673443, 6827.843541764,
1500.21435077089, 7126.26719914701, 1739.224414668, 966.841297338958, 2376.84666585316, 36616.3433559159, 2790.38916847681,
15464.9263565565, 4169.26300711224, 42201.9787198769, 684.94613206142, 4673.17550742545, 4413.8560019269, 9266.46204313372,
960.864671792237, 1422.33572084286, 1280.7693303125, 10638.2524516624, 306.094477687627, 5966.86308520645, 4342.43675847564,
507.175874472451, 1542.70627800266, 1020.4762614807, 2804.60842701087, 1383.78180095318, 4242.93573651035, 685.802139068739,
2250.1253124631, 3368.91345811865, 2286.31770542194, 16252.882869076, 3387.9933469011, 846.557089640947, 15934.374453214,
4813.15980994366, 254.537277570523, 5745.02912912273, 904.39145504319, 15162.2192239936, 20832.7129290786, 43506.3795543985,
11524.9631359661, 26007.5333317263, 2555.62754182112, 4410.08026617345)
titers.AAP.5d_paniculata<-c(216.244359597362, 2256.87751883652, 543.870566272558, 591.108328115216, 474.821286735504, 1208.96084337349,
993.61417241735, 2591.66776186095, 132.932793069722, 1795.6204379562, 2664.07465007776, 47.1723902272339, 534.00503778337,
837.09016393443, 1524.78952291862, 85.0223413762288, 1862.85714285714, 1898.34024896266, 610.64453125, 774.350282485876,
785.08875739645, 2943.92523364486, 1025.36, 1955.11, 788.36, 986.32)
titers.AAP.15d_paniculata<-c(156.354, 98.57, 287.559, 103.655)
#using conservative list-melt approach to generate long format with names
long.Fig2B<-melt(namedList(titers.AAP.0d_reticulata,
titers.AAP.5d_reticulata,
titers.AAP.15d_reticulata,
titers.AAP.5d_paniculata,
titers.AAP.15d_paniculata))
#reordering factors for plotting
long.Fig2B$L1<-factor(long.Fig2B$L1,
levels=c("titers.AAP.0d_reticulata",
"titers.AAP.5d_reticulata",
"titers.AAP.15d_reticulata",
"titers.AAP.5d_paniculata",
"titers.AAP.15d_paniculata"),ordered = TRUE)
#Figure 3. Citrus tristeza virus (CTV) infected proportion (A) and relative titer (B) of plants after transmitting by Diaphorina citri.
#Fig 3(A) CTV Proportion of infected citrus Shatangju (%)
months_post_feeding_CTV_citri<-c(0, 15, 30, 45, 60, 75, 90, 120, 210)
transmission_reticulata<-c(0, 0, 33.33, 41.67, 50, 50, 58.33, 58.33, 58.33)
transmission_limon<-c(0, 0, 0, 33.33, 50, 66.67, 83.33, 83.33, 83.33)
#making a table
Fig3A<-data.frame(transmission_reticulata, transmission_limon,
row.names= months_post_feeding_CTV_citri)
long.Fig3A<-melt(as.matrix(Fig3A)) #long format; note necessity to use as matrix to retrieve rows names
#Fig 3(B) CTV DNA titers in infected citrus Shatangju
#4 d from CTV transmission
#Investigation time after feeding by CTV+ D. citri (month)
copyn.reticulata.0<-c(0)
copyn.reticulata.15<-c(0)
copyn.reticulata.30<-c(159.61, 55.56, 40.20, 70.42)
copyn.reticulata.45<-c(139.06, 223.93, 310.93, 510.91, 24.02)
copyn.reticulata.60<-c(295.79, 54.88, 517.50, 935.56, 130.22, 1137.11)
copyn.reticulata.75<-c(587.85, 254.50, 919.25, 856.54, 518.69, 1668.56)
copyn.reticulata.90<-c(965.54, 1544.23, 31.58, 54.62, 158.46, 3159.37, 3812.60)
copyn.reticulata.120<-c(1233.21, 1435.24, 2445.33, 876.30, 1322.45, 5443.20, 3321.23)
copyn.reticulata.210<-c(3324.24, 2454.35, 4213.23, 2432.34, 1455.53, 2345.51, 2190.23)
#14 d from CTV transmission
#Investigation time after feeding by CTV+ D. citri (month)
copyn.limon.0<-c(0)
copyn.limon.15<-c(0)
copyn.limon.30<-c(0)
copyn.limon.45<-c(21.57, 13.00)
copyn.limon.60<-c(15.90, 13.72, 5.30)
copyn.limon.75<-c(15.30, 12.29, 9.68, 9.68)
copyn.limon.90<-c(21.57, 18.37, 6.89, 14.00, 57.01)
copyn.limon.120<-c(35.11, 28.73, 12.21, 23.94, 77.35)
copyn.limon.210<-c(57.11, 32.12, 36.23, 39.43, 79.04)
Fig3B.reticulata<-list(
copyn.reticulata.0,
copyn.reticulata.15,
copyn.reticulata.30,
copyn.reticulata.45,
copyn.reticulata.60,
copyn.reticulata.75,
copyn.reticulata.90,
copyn.reticulata.120,
copyn.reticulata.210
)
names(Fig3B.reticulata)<-months_post_feeding_CTV_citri
Fig3B.limon<-list(
copyn.limon.0,
copyn.limon.15,
copyn.limon.30,
copyn.limon.45,
copyn.limon.60,
copyn.limon.75,
copyn.limon.90,
copyn.limon.120,
copyn.limon.210
)
names(Fig3B.limon)<-months_post_feeding_CTV_citri
Fig3B<-list(Fig3B.reticulata, Fig3B.limon)
names(Fig3B)<-c("reticulata", "limon")
long.Fig3B<-melt(Fig3B)
#Reordering ggplot factors
long.Fig3B$L1 <- factor(long.Fig3B$L1,
levels = c('reticulata','limon'),ordered = TRUE)
#Producing column for colour legends as requested by Wu Fen
long.Fig2A_<-separate(data = long.Fig2A, col = L1, into = c("treatment", "species"), sep = "_", remove=FALSE)
long.Fig2B_<-separate(data = long.Fig2B, col = L1, into = c("treatment", "species"), sep = "_", remove=FALSE)
#Plotting Figures
##Figure 2 improved version
#setting up labels -- find out why italics expression isn't working??
Fig2.labels<-c(expression(paste(italic("C. ret"), "-0d")),
expression(paste(italic("C. ret"), "-5d")),
expression(paste(italic("C. ret"), "-15d")),
expression(paste(italic("M. pan"), "-5d")),
expression(paste(italic("M. pan"), "-15d")))
#coding for Fig2A
Fig2A.plot<-ggplot(long.Fig2A_, aes(x=L1, y=value))+
stat_summary(aes(col=species), fun.ymin = function(x) mean(x) - sd(x),
fun.ymax = function(x) mean(x) + sd(x),
geom = "errorbar", position=position_dodge(width=0.95), width=0.2)+
geom_jitter(width=0.02, col="gray", size=0.5)+
stat_summary(fun.y = median, geom = "point",
shape= 15, size=3, position=position_dodge(width=0.95))+
geom_vline(xintercept = 1.5, linetype="dotted")+
scale_x_discrete("Days after AAP on CTV+ citrus", labels=Fig2.labels)+
scale_y_continuous("% of CTV+ citrus", labels=seq(20,100,20))+
theme_classic()+
theme(legend.position=c(.9, .9))
#coding for Fig2B
Fig2B.plot<-ggplot(long.Fig2B_, aes(x=treatment, y=log(value), fill=factor(L1, , levels = rev(levels(L1)))))+
geom_boxplot(aes(col=species), outlier.shape = NA, width=0.2, , position = position_dodge2(preserve="single"))+
scale_fill_manual(values = rep(NA, 5)) +
geom_point(shape=16,size=0.5, col= "grey", position=position_jitterdodge(jitter.width = 0.02, dodge.width = 0.2), show.legend=F)+
geom_vline(xintercept = 1.5, linetype="dotted")+
scale_x_discrete("Days after AAP on CTV+ citrus", labels=c(expression(paste(italic("C. ret"), "-0d")), "5d", "15d"))+
scale_y_continuous("Amount CTV+ / total citrus DNA (ng)")+
theme_classic()+
guides(fill=FALSE)+
theme(legend.position=c(.9, .9))
#Generating Figure 2
multiplot(Fig2A.plot, Fig2B.plot)
#Figure 3 improved version
#coding for Fig3A
Fig3A.plot<-ggplot(long.Fig3A, aes(x=Var1, y=value, group = Var2))+
geom_line(aes(colour=Var2), size=1) +
stat_summary(fun.y = mean, geom = "point")+
theme (legend.position=c(0.15,0.85)) +
scale_colour_manual(values=c("grey", "black"), name="Days since CTV transmission", labels=c("reticulata", "limon"))+
scale_x_continuous("Months after exposure to CTV+ D. citri", breaks=seq(0.5,3,0.5))+
scale_y_continuous("% of CTV+ citrus", limits=c(0,100))+
theme_classic()
#coding for Fig.3B
Fig3B.plot<-ggplot(long.Fig3B, aes(x=L2, y=log(value), group=interaction(L2,L1)))+
geom_boxplot(width=0.4, aes(fill=L1), position=position_dodge(0.7))+
scale_fill_manual(values=c("grey", "white"), name="Days since CTV transmission", labels=c("reticulata", "limon"))+
labs(x="Months after exposure to CTV+ D. citri", y="Amount CTV+ / total citrus DNA (ng)")+
theme_classic()
#Generating Figure 3
multiplot(Fig3A.plot, Fig3B.plot)
###Statistical Analyses *****************************
##Table 1. Proportion of Citrus tristeza virus (CTV) genome variation through Diaphorina citri transmission (%)
#Perhaps make it a Figure? Not sure what's the point of these results
#Identity of nucleotide of the original CTV+ citrus
High_ID_original<-c(98.24, 96.58, 95.02)
Low_ID_original<-c(97.46, 99.04, 98.67)
#Identity of nucleotide of the final infected citrus,
High_ID_final<-c(99.98, 99.8, 99.97)
Low_ID_final<-c(99.89, 99.66, 99.91)
#SNPs of D. citri during transmission,
High_ID_SNPs_during<-c(3.87, 3.69, 3.27)
Low_ID_SNPs_during<-c(0.45, 0.72, 0.57)
#SNPs of final infected citrus after D. citri transmission,
High_ID_SNPs_after<-c(1.92, 1.75, 1.52)
Low_ID_SNPs_after<-c(0.28, 0.13, 0.05)
#non-parametric pair comparison using wilcoxon
wilcox.test(High_ID_original, Low_ID_original)
wilcox.test(High_ID_final, Low_ID_final)
wilcox.test(High_ID_SNPs_during, Low_ID_SNPs_during)
wilcox.test(High_ID_SNPs_after, Low_ID_SNPs_after)
dunn.test(long.Fig2A$value, long.Fig2A$variable, list=TRUE)
@eduardofox2
Copy link
Author

Once the paper comes out, I will update this code with better syntax.
Please, any suggestions or comments, find me on Twitter account @ofoxofox

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment