Skip to content

Instantly share code, notes, and snippets.

@SamStudio8
Created October 6, 2019 15:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save SamStudio8/6b16674bae8afec706bed80e6ac2da24 to your computer and use it in GitHub Desktop.
Save SamStudio8/6b16674bae8afec706bed80e6ac2da24 to your computer and use it in GitHub Desktop.
library(tidyverse)
assemblies=read_tsv('kraken_summary.bond.tsv')
short_name <- c(
"Bacillus subtilis" = "bs",
"Cryptococcus neoformans" = "cn",
"Enterococcus faecalis" = "ef",
"Escherichia coli" = "ec",
"Lactobacillus fermentum" = "lf",
"Listeria monocytogenes" = "lm",
"Pseudomonas aeruginosa" = "pa",
"Saccharomyces cerevisiae" = "sc",
"Salmonella enterica" = "se",
"Staphylococcus aureus" = "sa"
)
# List the non-sequencing runs
exp_list <- c("expected_genomes", "Zymo-Isolates-SPAdes-Illumina", "pacbio")
assemblies <- assemblies[assemblies$Taxon!='X',] # Drop the few 'X' taxa
assemblies <- assemblies[assemblies$Suffix %in% c('fasta.k2', 'k2', 'ctg.cns.fa.k2'),]
assemblies$label <- paste(assemblies$community, assemblies$platform, sep="\n")
expected <- assemblies[assemblies$SampleID %in% exp_list,]
assemblies <- assemblies[!(assemblies$SampleID %in% exp_list),]
assemblies <- assemblies[assemblies$ContigSize >= 10000,]
assemblies <- rbind(assemblies, expected) # add the expected contigs back
# Relabel non-sequencing runs
assemblies[assemblies$SampleID=="pacbio",]$label <- "McIntyre et al.\nPacBio Assembly"
assemblies[assemblies$SampleID=="expected_genomes",]$label <- "Estimated\nGenome"
assemblies[assemblies$SampleID=="Zymo-Isolates-SPAdes-Illumina",]$label <- "Illumina Isolate\nAssembly"
# Tidy labels
assemblies[assemblies$label=="EVEN\nGRID",]$label <- "EVEN\nGridION"
assemblies[assemblies$label=="LOG\nGRID",]$label <- "LOG\nGridION"
assemblies[assemblies$label=="EVEN\nPROM25",]$label <- "EVEN\nPromethION 25%"
assemblies[assemblies$label=="LOG\nPROM25",]$label <- "LOG\nPromethION 25%"
# Order the assemblies
assemblies$label <- factor(assemblies$label, levels = c("Estimated\nGenome", "McIntyre et al.\nPacBio Assembly", "Illumina Isolate\nAssembly", "EVEN\nGridION", "EVEN\nPromethION 25%", "LOG\nGridION", "LOG\nPromethION 25%"))
assemblies$params <- paste( paste("-e",assemblies$edge, sep=""), paste("-L",assemblies$length, sep=""), paste("-p",assemblies$pmer, sep=""))
# Drop the params for the non-sequencing assemblies
assemblies[assemblies$SampleID %in% exp_list,]$params <- ""
# Make the magic happen
p <- ggplot(assemblies, aes(x=params, y=ContigSize, fill=Taxon)) +
geom_bar(stat="identity", colour="black", size=0.15)+
facet_grid(label~Taxon, scales="free", space="free", labeller = labeller(Taxon = short_name)) +
theme_bw() +
theme(text = element_text(size = 25)) +
theme(axis.text.x = element_text(angle = 0, hjust = 1)) +
theme(legend.position="bottom") +
coord_flip() +
scale_y_continuous(labels=function(x)x/1000000, expand=c(0, 0, 0.15, 0), breaks=seq(2000000,40000000,2000000)) +
xlab("wtdbg2 Parameters") + ylab("Contig Size (Mbp)") +
theme(strip.text.y = element_text(angle = 0)) +
theme(strip.text.x = element_text(size = 30)) +
theme(panel.spacing = unit(1, "lines"))
ggsave("contiguity.png", width=40, height=22, units="in")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment