Last active
June 5, 2024 15:08
-
-
Save robiwangriff/671a47aa00b8df9b3b7e0dd41c74f386 to your computer and use it in GitHub Desktop.
ngs2gel_basic
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ngs2gel.b<-function(spc,tax,subs=0.005,band_multiplier=5,label_tax_1="Phylum", | |
label_tax_2="Order", band_col="grey",y_labs_remover=2,y_labs_size=1){ | |
#libs used | |
require(reshape2) | |
require(vegan) | |
require(ggh4x) | |
require(cowplot) | |
require(ggrepel) | |
#convert species df to proportions (0 to 1 scale) | |
spc<-decostand(spc,"total") | |
#remove species whose proportional abundance is less than "subs" argument & recode new "0" to NA | |
spc[spc<subs]<-0 | |
spc<-spc[,colSums(spc)>0] | |
spc[spc==0]<-NA | |
####################################################################### | |
#reorder species randomly (NOT DONE) | |
#set.seed(32) | |
#ord<-sample(ncol(spc)) | |
#spc<-spc[,ord] | |
#colnames(spc)<-1:ncol(spc) | |
######################################################################## | |
#sort species according to taxonomy | |
#index the species names so they are no longer long sequences | |
taxa.sel<-colnames(spc)#separate seqs | |
#pull tax info for the subsetted taxa | |
tax_df<-cbind(tax[match(taxa.sel,rownames(tax)),],taxa.sel) | |
#add in unknown classification | |
tax_df[is.na(tax_df)]<-"Unknown" | |
#Now sort alphabetically by taxonomy (other sorts could be implemented eg phylogeny) | |
tax_df<-tax_df[do.call(order, tax_df), ] | |
#sort spc df according to ordered tax df | |
spc<-spc[,match(rownames(tax_df), colnames(spc))] | |
identical(rownames(tax_df),colnames(spc))#check | |
#add in a unique ID (index) for each taxon based on the taxonomy sort (this is to help order the y axis in plot) | |
colnames(spc)<-sprintf("%03d",1:ncol(spc)) | |
tax_df$names_spc<-sprintf("%03d",1:nrow(tax_df))# create matching index for tax_df taxa | |
################################################################################# | |
#reorder rows (samples) according to "order_samps" argument | |
#spc<-spc[with(env,do.call(order,mget(order_samps))),] | |
#env<-env[with(env,do.call(order,mget(order_samps))),] | |
#env<-env[,order_samps] | |
#env$x<-1:nrow(env)# this is just a vector which could order the samples along x in final plot, but also allows spacing between "lanes" | |
################################################################################## | |
#make long df for ggplotting, with xy coords and abundances (for band fatness) | |
dat.w<-data.frame(spc,x=1:nrow(spc),check.names=F) | |
dat.l<-melt(dat.w,id.vars=c("x")) | |
names(dat.l)[names(dat.l) == 'variable'] <- 'y' | |
names(dat.l)[names(dat.l) == 'value'] <- 'abund' | |
dat.l$abund<-as.numeric(dat.l$abund) | |
#dat.l$y<-as.numeric(dat.l$y) | |
#add in selected taxon levels for labelling taxa to long df | |
dat.l$tax_1<-tax_df[match(dat.l$y,tax_df$names_spc),][,label_tax_1] | |
dat.l$tax_2<-tax_df[match(dat.l$y,tax_df$names_spc),][,label_tax_2] | |
#make a new name for labelling taxa | |
dat.l$id<-paste(dat.l$tax_2,dat.l$y,sep="_") | |
############################################################## | |
#we'll be plotting facets later, and want to add some padding to each facet at top and bottom | |
#min | |
dat.l$y_min<--1 | |
#max which will be number of taxa per tax_1 designation (the facet) plus some padding | |
#count number of taxa per tax_1 taxonomic designation | |
tab<-data.frame(table(tax_df[,label_tax_1])) | |
dat.l$y_max<-tab[match(dat.l$tax_1,tab$Var1),]$Freq*1 | |
############################################################################################ | |
#we also wont have room on y axis to label all species, so need to subset somehow | |
#first need make a column containing labels for plotting using ggrepel. | |
#Because our plotting df is "long format", we need only one label name for each y value - ie non duplicated | |
dat.l$labels<-dat.l$id | |
dat.l$labels[duplicated(dat.l$labels)] <- "" #remove duplicates due to long df | |
dat.l$labels<-sub("_[^_]+$", "", dat.l$labels) #remove numeric index in name | |
#We can further filter these eg by abundance if the y is looking too busy | |
# This could be expanded to only include "indicators" or anything really of interest | |
#make table with abundances each taxon (ie make long df wide again) | |
tab<-data.frame(aggregate(dat.l$abund, by = list(dat.l$y), mean,na.rm = TRUE)) | |
tab$col<-ifelse(tab$x>y_labs_remover*subs,"black","transparent") # set colour code so low abund labels transparent | |
dat.l$y_lab_col<-tab[match(dat.l$y,tab$Group.1),]$col #add to long df | |
#not used but potentially useful for advanced labelling | |
#require(ggtext) | |
#dat.l$id2<-ifelse(dat.l$y_lab_col=="transparent",paste0("<span style='font-size: 0pt;color:NA'>", dat.l$id, "</span>"),dat.l$id) | |
######################################################################################################### | |
# Now plot. Two plots will be created, one being the plot (pl), the other the taxon labels (axis). | |
# They then get bound by cowplot | |
pl<-ggplot(dat.l,aes(x=x))+ | |
geom_blank()+ | |
scale_x_discrete(NULL, guide = "axis_nested") + | |
geom_segment(aes(x = x-0.3, y = y, xend = x+0.3, yend = y),col=band_col,lineend = "round",linewidth=band_multiplier*dat.l$abund)+ | |
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), | |
panel.background = element_blank(), axis.line = element_blank(),strip.background=element_rect(fill="grey96"))+ | |
scale_y_discrete(drop=TRUE,expand=c(0,0),,labels = NULL, name = NULL)+ | |
theme(strip.text.y.right = element_text(angle = 0),panel.spacing.y = unit(0.2, "lines"), | |
axis.text.y = element_text(size=y_labs_size, angle=30),axis.ticks.y = element_blank(),plot.margin = ggplot2::margin(0, 0, 2, 0, "pt"))+ | |
facet_grid2(tax_1~., scales = "free", space = "free_y",strip = strip_vanilla(size = "variable",clip = "off"))+ | |
theme(strip.text.y = element_text(size = 6))+ | |
geom_blank(aes(y = y_min-2)) + | |
geom_blank(aes(y = y_max+2)) | |
axis<-ggplot(dat.l,aes(x=0, y=y,label=labels))+ | |
geom_blank()+ | |
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), | |
panel.background = element_blank(), axis.line = element_blank(),strip.background=element_rect(fill="grey96"))+ | |
scale_y_discrete(drop=TRUE,expand=c(0,0),labels = NULL, name = NULL)+ | |
theme(panel.spacing.y = unit(0.2, "lines"), | |
axis.text.y = element_blank(),axis.ticks.y = element_blank(),strip.background = element_blank(), | |
strip.text.y = element_blank())+ | |
facet_grid2(tax_1~., scales = "free", space = "free_y",strip = strip_vanilla(size = "variable",clip = "off"))+ | |
geom_text_repel(min.segment.length = grid::unit(0, "pt"), | |
color = dat.l$y_lab_col, ## ggplot2 theme_grey() axis text | |
size = y_labs_size, ## ggplot2 theme_grey() axis text | |
max.overlaps=Inf, | |
seed = 42, | |
force = 4, | |
force_pull = 0.0, | |
nudge_x = -1, | |
direction = "x", | |
hjust = 0, | |
segment.size = 0.0001, | |
segment.alpha = 0.1, | |
box.padding = 0.000 | |
)+ | |
scale_x_continuous(limits = c(-1, 0), expand = c(0, 0), | |
breaks = NULL, labels = NULL, name = NULL)+ | |
theme(panel.background = element_blank(), | |
plot.margin = ggplot2::margin(0, 0, 2, 0, "pt"))+ | |
geom_blank(aes(y = y_min-2)) + | |
geom_blank(aes(y = y_max+2)) | |
#align and plot | |
aligned <- align_plots(axis, pl, align = "h", axis = "l") | |
plot_grid(plotlist = aligned, nrow = 1, rel_widths = c(1, 5)) | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment