Skip to content

Instantly share code, notes, and snippets.

@robiwangriff
Last active June 5, 2024 15:08
Show Gist options
  • Save robiwangriff/671a47aa00b8df9b3b7e0dd41c74f386 to your computer and use it in GitHub Desktop.
Save robiwangriff/671a47aa00b8df9b3b7e0dd41c74f386 to your computer and use it in GitHub Desktop.
ngs2gel_basic
ngs2gel.b<-function(spc,tax,subs=0.005,band_multiplier=5,label_tax_1="Phylum",
label_tax_2="Order", band_col="grey",y_labs_remover=2,y_labs_size=1){
#libs used
require(reshape2)
require(vegan)
require(ggh4x)
require(cowplot)
require(ggrepel)
#convert species df to proportions (0 to 1 scale)
spc<-decostand(spc,"total")
#remove species whose proportional abundance is less than "subs" argument & recode new "0" to NA
spc[spc<subs]<-0
spc<-spc[,colSums(spc)>0]
spc[spc==0]<-NA
#######################################################################
#reorder species randomly (NOT DONE)
#set.seed(32)
#ord<-sample(ncol(spc))
#spc<-spc[,ord]
#colnames(spc)<-1:ncol(spc)
########################################################################
#sort species according to taxonomy
#index the species names so they are no longer long sequences
taxa.sel<-colnames(spc)#separate seqs
#pull tax info for the subsetted taxa
tax_df<-cbind(tax[match(taxa.sel,rownames(tax)),],taxa.sel)
#add in unknown classification
tax_df[is.na(tax_df)]<-"Unknown"
#Now sort alphabetically by taxonomy (other sorts could be implemented eg phylogeny)
tax_df<-tax_df[do.call(order, tax_df), ]
#sort spc df according to ordered tax df
spc<-spc[,match(rownames(tax_df), colnames(spc))]
identical(rownames(tax_df),colnames(spc))#check
#add in a unique ID (index) for each taxon based on the taxonomy sort (this is to help order the y axis in plot)
colnames(spc)<-sprintf("%03d",1:ncol(spc))
tax_df$names_spc<-sprintf("%03d",1:nrow(tax_df))# create matching index for tax_df taxa
#################################################################################
#reorder rows (samples) according to "order_samps" argument
#spc<-spc[with(env,do.call(order,mget(order_samps))),]
#env<-env[with(env,do.call(order,mget(order_samps))),]
#env<-env[,order_samps]
#env$x<-1:nrow(env)# this is just a vector which could order the samples along x in final plot, but also allows spacing between "lanes"
##################################################################################
#make long df for ggplotting, with xy coords and abundances (for band fatness)
dat.w<-data.frame(spc,x=1:nrow(spc),check.names=F)
dat.l<-melt(dat.w,id.vars=c("x"))
names(dat.l)[names(dat.l) == 'variable'] <- 'y'
names(dat.l)[names(dat.l) == 'value'] <- 'abund'
dat.l$abund<-as.numeric(dat.l$abund)
#dat.l$y<-as.numeric(dat.l$y)
#add in selected taxon levels for labelling taxa to long df
dat.l$tax_1<-tax_df[match(dat.l$y,tax_df$names_spc),][,label_tax_1]
dat.l$tax_2<-tax_df[match(dat.l$y,tax_df$names_spc),][,label_tax_2]
#make a new name for labelling taxa
dat.l$id<-paste(dat.l$tax_2,dat.l$y,sep="_")
##############################################################
#we'll be plotting facets later, and want to add some padding to each facet at top and bottom
#min
dat.l$y_min<--1
#max which will be number of taxa per tax_1 designation (the facet) plus some padding
#count number of taxa per tax_1 taxonomic designation
tab<-data.frame(table(tax_df[,label_tax_1]))
dat.l$y_max<-tab[match(dat.l$tax_1,tab$Var1),]$Freq*1
############################################################################################
#we also wont have room on y axis to label all species, so need to subset somehow
#first need make a column containing labels for plotting using ggrepel.
#Because our plotting df is "long format", we need only one label name for each y value - ie non duplicated
dat.l$labels<-dat.l$id
dat.l$labels[duplicated(dat.l$labels)] <- "" #remove duplicates due to long df
dat.l$labels<-sub("_[^_]+$", "", dat.l$labels) #remove numeric index in name
#We can further filter these eg by abundance if the y is looking too busy
# This could be expanded to only include "indicators" or anything really of interest
#make table with abundances each taxon (ie make long df wide again)
tab<-data.frame(aggregate(dat.l$abund, by = list(dat.l$y), mean,na.rm = TRUE))
tab$col<-ifelse(tab$x>y_labs_remover*subs,"black","transparent") # set colour code so low abund labels transparent
dat.l$y_lab_col<-tab[match(dat.l$y,tab$Group.1),]$col #add to long df
#not used but potentially useful for advanced labelling
#require(ggtext)
#dat.l$id2<-ifelse(dat.l$y_lab_col=="transparent",paste0("<span style='font-size: 0pt;color:NA'>", dat.l$id, "</span>"),dat.l$id)
#########################################################################################################
# Now plot. Two plots will be created, one being the plot (pl), the other the taxon labels (axis).
# They then get bound by cowplot
pl<-ggplot(dat.l,aes(x=x))+
geom_blank()+
scale_x_discrete(NULL, guide = "axis_nested") +
geom_segment(aes(x = x-0.3, y = y, xend = x+0.3, yend = y),col=band_col,lineend = "round",linewidth=band_multiplier*dat.l$abund)+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),strip.background=element_rect(fill="grey96"))+
scale_y_discrete(drop=TRUE,expand=c(0,0),,labels = NULL, name = NULL)+
theme(strip.text.y.right = element_text(angle = 0),panel.spacing.y = unit(0.2, "lines"),
axis.text.y = element_text(size=y_labs_size, angle=30),axis.ticks.y = element_blank(),plot.margin = ggplot2::margin(0, 0, 2, 0, "pt"))+
facet_grid2(tax_1~., scales = "free", space = "free_y",strip = strip_vanilla(size = "variable",clip = "off"))+
theme(strip.text.y = element_text(size = 6))+
geom_blank(aes(y = y_min-2)) +
geom_blank(aes(y = y_max+2))
axis<-ggplot(dat.l,aes(x=0, y=y,label=labels))+
geom_blank()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_blank(),strip.background=element_rect(fill="grey96"))+
scale_y_discrete(drop=TRUE,expand=c(0,0),labels = NULL, name = NULL)+
theme(panel.spacing.y = unit(0.2, "lines"),
axis.text.y = element_blank(),axis.ticks.y = element_blank(),strip.background = element_blank(),
strip.text.y = element_blank())+
facet_grid2(tax_1~., scales = "free", space = "free_y",strip = strip_vanilla(size = "variable",clip = "off"))+
geom_text_repel(min.segment.length = grid::unit(0, "pt"),
color = dat.l$y_lab_col, ## ggplot2 theme_grey() axis text
size = y_labs_size, ## ggplot2 theme_grey() axis text
max.overlaps=Inf,
seed = 42,
force = 4,
force_pull = 0.0,
nudge_x = -1,
direction = "x",
hjust = 0,
segment.size = 0.0001,
segment.alpha = 0.1,
box.padding = 0.000
)+
scale_x_continuous(limits = c(-1, 0), expand = c(0, 0),
breaks = NULL, labels = NULL, name = NULL)+
theme(panel.background = element_blank(),
plot.margin = ggplot2::margin(0, 0, 2, 0, "pt"))+
geom_blank(aes(y = y_min-2)) +
geom_blank(aes(y = y_max+2))
#align and plot
aligned <- align_plots(axis, pl, align = "h", axis = "l")
plot_grid(plotlist = aligned, nrow = 1, rel_widths = c(1, 5))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment