Last active
August 25, 2022 13:07
-
-
Save pgonzale60/55feee02ea228fd6669a04f65defaf00 to your computer and use it in GitHub Desktop.
Take in two BUSCO full table and generate a dot plot of colinearity from hits in long sequences
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(scales) | |
library(gtools) | |
min_seq_size <- 5e5 # Sequences shorter than this will not be plotted (the size of the sequence is inferred from the maximum coordinate of a BUSCO) | |
ref_busco <- "~/Downloads/CABPSW02.yahs_scaffolds_final_nematoda_odb10_full_table.tsv" # path to reference file | |
query_busco <- "~/Downloads/APS7_sophie.v4_nopipe.fasta.yahs_scaffolds_final_nematoda_odb10_full_table.tsv" # path to query file | |
ref_species <- "A. rhodensis" # text in x axis of plot | |
query_species <- "A. freiburgensis" # text in y axis of plot | |
plot_file <- "~/Downloads/APS4_vs_APS7.yahs.pdf" # filename with path to output file. Can be in PNG, JPEG, PDF and possibly other formats. | |
# Function to read BUSCO full table tsv | |
read_busco <- function(buscoFile){ | |
read_tsv(buscoFile, | |
col_names = c("Busco_id", "Status", "Sequence", | |
"start", "end", "strand", "Score", "Length", | |
"OrthoDB_url", "Description"), | |
col_types = c("ccciicdicc"), | |
comment = "#") %>% | |
group_by(Sequence) %>% | |
mutate(seq_size = max(end)) %>% | |
ungroup() %>% | |
# Filter largest sequences | |
# And use single copy only | |
filter(Status == "Complete", seq_size > min_seq_size) %>% | |
select(Busco_id, Sequence, start, end) %>% | |
mutate(Sequence = factor(Sequence, levels = mixedsort(unique(Sequence)))) | |
} | |
# Load buscos | |
ref_assem <- read_busco(ref_busco) | |
query_assem <- read_busco(query_busco) | |
# Plot | |
inner_join(ref_assem, query_assem, by = "Busco_id") %>% | |
ggplot(aes(x = start.x, y = start.y)) + | |
geom_point(alpha = 0.3) + | |
facet_grid(Sequence.y ~ Sequence.x, switch="both") + | |
theme_bw() + | |
# Number of ticks per facet is specified here as 4 | |
scale_y_continuous(breaks = scales::pretty_breaks(4), | |
position = "left", labels = label_number_si()) + | |
# Number of ticks per facet is specified here as 3 | |
scale_x_continuous(breaks = scales::pretty_breaks(3), | |
labels = label_number_si()) + | |
xlab(ref_species) + | |
ylab(query_species) | |
# You can adjust the width and height | |
ggsave(plot_file, | |
width = 16, height = 14) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment