Last active
April 20, 2020 23:14
-
-
Save eduardofox2/6dd03280d1838e482330a4a0ff8093be to your computer and use it in GitHub Desktop.
R script containing raw data, analysis and plotting of Figures in the scientific publication 'The psyllid Diaphorina citri as a vector of Citrus tristeza virus' by Wu et. al [in press]
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Written by Eduardo G. P. Fox in Rio de Janeiro, Brazil, 2018. | |
#Data pertains infection rates and relative viral DNA titers in host plants and insect vector of CTV citrus 'tristeza' virus | |
#Useful functions applied | |
#List preserving object names, from https://stackoverflow.com/questions/16951080/can-lists-be-created-that-name-themselves-based-on-input-object-names, credit: @BenBolker | |
namedList <- function(...) { | |
L <- list(...) | |
snm <- sapply(substitute(list(...)),deparse)[-1] | |
if (is.null(nm <- names(L))) nm <- snm | |
if (any(nonames <- nm=="")) nm[nonames] <- snm[nonames] | |
setNames(L,nm) | |
} | |
# Multiple plot function obtained from R CookBook Website | |
# | |
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects) | |
# - cols: Number of columns in layout | |
# - layout: A matrix specifying the layout. If present, 'cols' is ignored. | |
# | |
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE), | |
# then plot 1 will go in the upper left, 2 will go in the upper right, and | |
# 3 will go all the way across the bottom. | |
# | |
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) { | |
library(grid) | |
# Make a list from the ... arguments and plotlist | |
plots <- c(list(...), plotlist) | |
numPlots = length(plots) | |
# If layout is NULL, then use 'cols' to determine layout | |
if (is.null(layout)) { | |
# Make the panel | |
# ncol: Number of columns of plots | |
# nrow: Number of rows needed, calculated from # of cols | |
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), | |
ncol = cols, nrow = ceiling(numPlots/cols)) | |
} | |
if (numPlots==1) { | |
print(plots[[1]]) | |
} else { | |
# Set up the page | |
grid.newpage() | |
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout)))) | |
# Make each plot, in the correct location | |
for (i in 1:numPlots) { | |
# Get the i,j matrix positions of the regions that contain this subplot | |
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE)) | |
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, | |
layout.pos.col = matchidx$col)) | |
} | |
} | |
} | |
#Mind you can Plot images with aesthetics corrections! | |
#*************************************************************************************** | |
#Relevant packages | |
require(reshape2) | |
require(ggplot2) | |
require(dplyr) | |
require(tidyr) | |
#Raw Data input & formatting | |
#Figure 2. Persistence of Citrus tristeza virus (CTV) infection by host plant species. | |
#Fig 2(A) CTV infection rates in D. citri | |
Fig2A<-list( | |
rates.AAP.0d_reticulata =c(100, 100, 100, 90, 70, 70, 80, 70), | |
rates.AAP.5d_reticulata =c(70, 50, 50, 80, 80, 80, 90, 70, 80, 70, 80, 70, 80, 70, 80, 70), | |
rates.AAP.15d_reticulata=c(90, 50, 40, 80, 40, 70, 90, 60), | |
rates.AAP.5d_paniculata =c(50, 70, 40, 10, 20, 30, 20, 20), | |
rates.AAP.15d_paniculata=c(10, 0, 20, 0, 0, 10, 0, 0) | |
) | |
#preparing long format | |
long.Fig2A<-melt(Fig2A) | |
#Fig 2(B) CTV viral DNA titers in infected D. citri as obtained by qPCR | |
titers.AAP.0d_reticulata<-c(24045.8015267176, 33964.3347050754, 10459.1104734577, 31956.8822553897, 4246.03174603175, 25221.2389380531, | |
3841.02564102564, 9272.95918367347, 22259.8870056497, 15584.2185128983, 7183.79446640316, 2896.44432909421, 3171.75094024237, | |
1329.72108722686, 1878.72662961091, 491.974442886084, 7070.33751028995, 817.520080321285, 701.823050777629, 2443.15941648048, | |
966.047226491547, 226.481751824818, 23748.0559875583, 848.081264108352, 2338.36477987421, 7380.71065989848, 17184.8739495798, | |
17503.5868005739, 5708.84146341463, 27704.5908183633, 9771.80456390872, 29233.2038555604, 875.034984606773, 59184.2620249253, | |
496.512292738708, 774.275362318841, 10550.0593589236, 8152.64445699228, 23046.9074304691, 7654.7433903577, 48752.0201113306, | |
34409.2623454083, 2300.13550135501, 5713.9750652363, 8808.32735104092, 7690.66937119675, 4144.64534075104, 7794.03905447071, | |
3132.11845102506, 1949.76076555024, 47733.860342556, 7506.57894736842, 2866.2109375, 12545.12, 9877.25, 9994.01, 14497.58, | |
6584.25, 21145.35, 15583.35, 8654.22, 11254.35, 18931.01, 14751.24, 8856.25, 9889.25, 5848.47, 14458.22) | |
titers.AAP.5d_reticulata<-c(2900.79641148325, 4424.62926829268, 6979.94923857868, 9778.84615384615, 5480.92783505155, 2918.21815286624, | |
13690.3223880597, 12977.3365231259, 2219.64245810056, 9166.0705882353, 10051.5703971119, 4413.39590443686, 4456.45627376426, | |
3607.13716108453, 9970.33333333333, 6514.28571428571, 2526.09438202247, 1803.8209432454, 8258.48806366048, 5992.18827415359, | |
9979.98308906426, 9504.24151696607, 7426.82519280206, 16240.3315649867, 20101.8426966292, 5568.25, 10255.25, 6877.22, 4987.99, | |
3658.58, 8225.36, 8744.15, 6856, 6445.08, 6899.36, 5785.14, 9875.36, 12336.08, 10545.25, 9845.12, 4588.25, 7896.35, 8975.056, | |
10254.92, 6584.55, 9786.54, 6458.25, 8952.25, 13578.06, 8854.56, 7754.25, 7532.75, 5644.8, 6658.25, 5554.24, 7555.01, 8810.73, | |
5734.89, 3911.04, 4617.80, 5221.06, 4414.15, 5785.21, 4745.08, 2849.93, 3582.04, 10327.93, 1032.38, 8515.22, 6951.12, 6138.83, | |
7816.05, 10915.60, 8254.62, 6481.55, 5886.23, 5895.05, 11912.86, 11508.16, 9854.06, 6751.15, 6831.05, 6644.80, 7798.15, 6553.15, | |
6355.51, 7824.80) | |
titers.AAP.15d_reticulata<-c(1951.14523875878, 4888.59022594451, 3879.20863437825, 8037.70242256264, 4724.91690673443, 6827.843541764, | |
1500.21435077089, 7126.26719914701, 1739.224414668, 966.841297338958, 2376.84666585316, 36616.3433559159, 2790.38916847681, | |
15464.9263565565, 4169.26300711224, 42201.9787198769, 684.94613206142, 4673.17550742545, 4413.8560019269, 9266.46204313372, | |
960.864671792237, 1422.33572084286, 1280.7693303125, 10638.2524516624, 306.094477687627, 5966.86308520645, 4342.43675847564, | |
507.175874472451, 1542.70627800266, 1020.4762614807, 2804.60842701087, 1383.78180095318, 4242.93573651035, 685.802139068739, | |
2250.1253124631, 3368.91345811865, 2286.31770542194, 16252.882869076, 3387.9933469011, 846.557089640947, 15934.374453214, | |
4813.15980994366, 254.537277570523, 5745.02912912273, 904.39145504319, 15162.2192239936, 20832.7129290786, 43506.3795543985, | |
11524.9631359661, 26007.5333317263, 2555.62754182112, 4410.08026617345) | |
titers.AAP.5d_paniculata<-c(216.244359597362, 2256.87751883652, 543.870566272558, 591.108328115216, 474.821286735504, 1208.96084337349, | |
993.61417241735, 2591.66776186095, 132.932793069722, 1795.6204379562, 2664.07465007776, 47.1723902272339, 534.00503778337, | |
837.09016393443, 1524.78952291862, 85.0223413762288, 1862.85714285714, 1898.34024896266, 610.64453125, 774.350282485876, | |
785.08875739645, 2943.92523364486, 1025.36, 1955.11, 788.36, 986.32) | |
titers.AAP.15d_paniculata<-c(156.354, 98.57, 287.559, 103.655) | |
#using conservative list-melt approach to generate long format with names | |
long.Fig2B<-melt(namedList(titers.AAP.0d_reticulata, | |
titers.AAP.5d_reticulata, | |
titers.AAP.15d_reticulata, | |
titers.AAP.5d_paniculata, | |
titers.AAP.15d_paniculata)) | |
#reordering factors for plotting | |
long.Fig2B$L1<-factor(long.Fig2B$L1, | |
levels=c("titers.AAP.0d_reticulata", | |
"titers.AAP.5d_reticulata", | |
"titers.AAP.15d_reticulata", | |
"titers.AAP.5d_paniculata", | |
"titers.AAP.15d_paniculata"),ordered = TRUE) | |
#Figure 3. Citrus tristeza virus (CTV) infected proportion (A) and relative titer (B) of plants after transmitting by Diaphorina citri. | |
#Fig 3(A) CTV Proportion of infected citrus Shatangju (%) | |
months_post_feeding_CTV_citri<-c(0, 15, 30, 45, 60, 75, 90, 120, 210) | |
transmission_reticulata<-c(0, 0, 33.33, 41.67, 50, 50, 58.33, 58.33, 58.33) | |
transmission_limon<-c(0, 0, 0, 33.33, 50, 66.67, 83.33, 83.33, 83.33) | |
#making a table | |
Fig3A<-data.frame(transmission_reticulata, transmission_limon, | |
row.names= months_post_feeding_CTV_citri) | |
long.Fig3A<-melt(as.matrix(Fig3A)) #long format; note necessity to use as matrix to retrieve rows names | |
#Fig 3(B) CTV DNA titers in infected citrus Shatangju | |
#4 d from CTV transmission | |
#Investigation time after feeding by CTV+ D. citri (month) | |
copyn.reticulata.0<-c(0) | |
copyn.reticulata.15<-c(0) | |
copyn.reticulata.30<-c(159.61, 55.56, 40.20, 70.42) | |
copyn.reticulata.45<-c(139.06, 223.93, 310.93, 510.91, 24.02) | |
copyn.reticulata.60<-c(295.79, 54.88, 517.50, 935.56, 130.22, 1137.11) | |
copyn.reticulata.75<-c(587.85, 254.50, 919.25, 856.54, 518.69, 1668.56) | |
copyn.reticulata.90<-c(965.54, 1544.23, 31.58, 54.62, 158.46, 3159.37, 3812.60) | |
copyn.reticulata.120<-c(1233.21, 1435.24, 2445.33, 876.30, 1322.45, 5443.20, 3321.23) | |
copyn.reticulata.210<-c(3324.24, 2454.35, 4213.23, 2432.34, 1455.53, 2345.51, 2190.23) | |
#14 d from CTV transmission | |
#Investigation time after feeding by CTV+ D. citri (month) | |
copyn.limon.0<-c(0) | |
copyn.limon.15<-c(0) | |
copyn.limon.30<-c(0) | |
copyn.limon.45<-c(21.57, 13.00) | |
copyn.limon.60<-c(15.90, 13.72, 5.30) | |
copyn.limon.75<-c(15.30, 12.29, 9.68, 9.68) | |
copyn.limon.90<-c(21.57, 18.37, 6.89, 14.00, 57.01) | |
copyn.limon.120<-c(35.11, 28.73, 12.21, 23.94, 77.35) | |
copyn.limon.210<-c(57.11, 32.12, 36.23, 39.43, 79.04) | |
Fig3B.reticulata<-list( | |
copyn.reticulata.0, | |
copyn.reticulata.15, | |
copyn.reticulata.30, | |
copyn.reticulata.45, | |
copyn.reticulata.60, | |
copyn.reticulata.75, | |
copyn.reticulata.90, | |
copyn.reticulata.120, | |
copyn.reticulata.210 | |
) | |
names(Fig3B.reticulata)<-months_post_feeding_CTV_citri | |
Fig3B.limon<-list( | |
copyn.limon.0, | |
copyn.limon.15, | |
copyn.limon.30, | |
copyn.limon.45, | |
copyn.limon.60, | |
copyn.limon.75, | |
copyn.limon.90, | |
copyn.limon.120, | |
copyn.limon.210 | |
) | |
names(Fig3B.limon)<-months_post_feeding_CTV_citri | |
Fig3B<-list(Fig3B.reticulata, Fig3B.limon) | |
names(Fig3B)<-c("reticulata", "limon") | |
long.Fig3B<-melt(Fig3B) | |
#Reordering ggplot factors | |
long.Fig3B$L1 <- factor(long.Fig3B$L1, | |
levels = c('reticulata','limon'),ordered = TRUE) | |
#Producing column for colour legends as requested by Wu Fen | |
long.Fig2A_<-separate(data = long.Fig2A, col = L1, into = c("treatment", "species"), sep = "_", remove=FALSE) | |
long.Fig2B_<-separate(data = long.Fig2B, col = L1, into = c("treatment", "species"), sep = "_", remove=FALSE) | |
#Plotting Figures | |
##Figure 2 improved version | |
#setting up labels -- find out why italics expression isn't working?? | |
Fig2.labels<-c(expression(paste(italic("C. ret"), "-0d")), | |
expression(paste(italic("C. ret"), "-5d")), | |
expression(paste(italic("C. ret"), "-15d")), | |
expression(paste(italic("M. pan"), "-5d")), | |
expression(paste(italic("M. pan"), "-15d"))) | |
#coding for Fig2A | |
Fig2A.plot<-ggplot(long.Fig2A_, aes(x=L1, y=value))+ | |
stat_summary(aes(col=species), fun.ymin = function(x) mean(x) - sd(x), | |
fun.ymax = function(x) mean(x) + sd(x), | |
geom = "errorbar", position=position_dodge(width=0.95), width=0.2)+ | |
geom_jitter(width=0.02, col="gray", size=0.5)+ | |
stat_summary(fun.y = median, geom = "point", | |
shape= 15, size=3, position=position_dodge(width=0.95))+ | |
geom_vline(xintercept = 1.5, linetype="dotted")+ | |
scale_x_discrete("Days after AAP on CTV+ citrus", labels=Fig2.labels)+ | |
scale_y_continuous("% of CTV+ citrus", labels=seq(20,100,20))+ | |
theme_classic()+ | |
theme(legend.position=c(.9, .9)) | |
#coding for Fig2B | |
Fig2B.plot<-ggplot(long.Fig2B_, aes(x=treatment, y=log(value), fill=factor(L1, , levels = rev(levels(L1)))))+ | |
geom_boxplot(aes(col=species), outlier.shape = NA, width=0.2, , position = position_dodge2(preserve="single"))+ | |
scale_fill_manual(values = rep(NA, 5)) + | |
geom_point(shape=16,size=0.5, col= "grey", position=position_jitterdodge(jitter.width = 0.02, dodge.width = 0.2), show.legend=F)+ | |
geom_vline(xintercept = 1.5, linetype="dotted")+ | |
scale_x_discrete("Days after AAP on CTV+ citrus", labels=c(expression(paste(italic("C. ret"), "-0d")), "5d", "15d"))+ | |
scale_y_continuous("Amount CTV+ / total citrus DNA (ng)")+ | |
theme_classic()+ | |
guides(fill=FALSE)+ | |
theme(legend.position=c(.9, .9)) | |
#Generating Figure 2 | |
multiplot(Fig2A.plot, Fig2B.plot) | |
#Figure 3 improved version | |
#coding for Fig3A | |
Fig3A.plot<-ggplot(long.Fig3A, aes(x=Var1, y=value, group = Var2))+ | |
geom_line(aes(colour=Var2), size=1) + | |
stat_summary(fun.y = mean, geom = "point")+ | |
theme (legend.position=c(0.15,0.85)) + | |
scale_colour_manual(values=c("grey", "black"), name="Days since CTV transmission", labels=c("reticulata", "limon"))+ | |
scale_x_continuous("Months after exposure to CTV+ D. citri", breaks=seq(0.5,3,0.5))+ | |
scale_y_continuous("% of CTV+ citrus", limits=c(0,100))+ | |
theme_classic() | |
#coding for Fig.3B | |
Fig3B.plot<-ggplot(long.Fig3B, aes(x=L2, y=log(value), group=interaction(L2,L1)))+ | |
geom_boxplot(width=0.4, aes(fill=L1), position=position_dodge(0.7))+ | |
scale_fill_manual(values=c("grey", "white"), name="Days since CTV transmission", labels=c("reticulata", "limon"))+ | |
labs(x="Months after exposure to CTV+ D. citri", y="Amount CTV+ / total citrus DNA (ng)")+ | |
theme_classic() | |
#Generating Figure 3 | |
multiplot(Fig3A.plot, Fig3B.plot) | |
###Statistical Analyses ***************************** | |
##Table 1. Proportion of Citrus tristeza virus (CTV) genome variation through Diaphorina citri transmission (%) | |
#Perhaps make it a Figure? Not sure what's the point of these results | |
#Identity of nucleotide of the original CTV+ citrus | |
High_ID_original<-c(98.24, 96.58, 95.02) | |
Low_ID_original<-c(97.46, 99.04, 98.67) | |
#Identity of nucleotide of the final infected citrus, | |
High_ID_final<-c(99.98, 99.8, 99.97) | |
Low_ID_final<-c(99.89, 99.66, 99.91) | |
#SNPs of D. citri during transmission, | |
High_ID_SNPs_during<-c(3.87, 3.69, 3.27) | |
Low_ID_SNPs_during<-c(0.45, 0.72, 0.57) | |
#SNPs of final infected citrus after D. citri transmission, | |
High_ID_SNPs_after<-c(1.92, 1.75, 1.52) | |
Low_ID_SNPs_after<-c(0.28, 0.13, 0.05) | |
#non-parametric pair comparison using wilcoxon | |
wilcox.test(High_ID_original, Low_ID_original) | |
wilcox.test(High_ID_final, Low_ID_final) | |
wilcox.test(High_ID_SNPs_during, Low_ID_SNPs_during) | |
wilcox.test(High_ID_SNPs_after, Low_ID_SNPs_after) | |
dunn.test(long.Fig2A$value, long.Fig2A$variable, list=TRUE) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Once the paper comes out, I will update this code with better syntax.
Please, any suggestions or comments, find me on Twitter account @ofoxofox