Skip to content

Instantly share code, notes, and snippets.

@pgonzale60
Last active August 25, 2022 13:07
Show Gist options
  • Save pgonzale60/55feee02ea228fd6669a04f65defaf00 to your computer and use it in GitHub Desktop.
Save pgonzale60/55feee02ea228fd6669a04f65defaf00 to your computer and use it in GitHub Desktop.
Take in two BUSCO full table and generate a dot plot of colinearity from hits in long sequences
library(tidyverse)
library(scales)
library(gtools)
min_seq_size <- 5e5 # Sequences shorter than this will not be plotted (the size of the sequence is inferred from the maximum coordinate of a BUSCO)
ref_busco <- "~/Downloads/CABPSW02.yahs_scaffolds_final_nematoda_odb10_full_table.tsv" # path to reference file
query_busco <- "~/Downloads/APS7_sophie.v4_nopipe.fasta.yahs_scaffolds_final_nematoda_odb10_full_table.tsv" # path to query file
ref_species <- "A. rhodensis" # text in x axis of plot
query_species <- "A. freiburgensis" # text in y axis of plot
plot_file <- "~/Downloads/APS4_vs_APS7.yahs.pdf" # filename with path to output file. Can be in PNG, JPEG, PDF and possibly other formats.
# Function to read BUSCO full table tsv
read_busco <- function(buscoFile){
read_tsv(buscoFile,
col_names = c("Busco_id", "Status", "Sequence",
"start", "end", "strand", "Score", "Length",
"OrthoDB_url", "Description"),
col_types = c("ccciicdicc"),
comment = "#") %>%
group_by(Sequence) %>%
mutate(seq_size = max(end)) %>%
ungroup() %>%
# Filter largest sequences
# And use single copy only
filter(Status == "Complete", seq_size > min_seq_size) %>%
select(Busco_id, Sequence, start, end) %>%
mutate(Sequence = factor(Sequence, levels = mixedsort(unique(Sequence))))
}
# Load buscos
ref_assem <- read_busco(ref_busco)
query_assem <- read_busco(query_busco)
# Plot
inner_join(ref_assem, query_assem, by = "Busco_id") %>%
ggplot(aes(x = start.x, y = start.y)) +
geom_point(alpha = 0.3) +
facet_grid(Sequence.y ~ Sequence.x, switch="both") +
theme_bw() +
# Number of ticks per facet is specified here as 4
scale_y_continuous(breaks = scales::pretty_breaks(4),
position = "left", labels = label_number_si()) +
# Number of ticks per facet is specified here as 3
scale_x_continuous(breaks = scales::pretty_breaks(3),
labels = label_number_si()) +
xlab(ref_species) +
ylab(query_species)
# You can adjust the width and height
ggsave(plot_file,
width = 16, height = 14)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment