Skip to content

Instantly share code, notes, and snippets.

@pgonzale60
Last active August 25, 2022 13:10
Show Gist options
  • Save pgonzale60/6ea9d2ae40f5a46614802d45f7a53989 to your computer and use it in GitHub Desktop.
Save pgonzale60/6ea9d2ae40f5a46614802d45f7a53989 to your computer and use it in GitHub Desktop.
Compares oxford plots of multiple species using shared coordinates rather than facets. Designed for BUSCO's full table.
library(tidyverse)
# User input ####
x_species <- c("nxOscDolc1.1", "nxOscOnir1.2")
y_species <- c("nxOscSper1.1", "nxOscSpeu1.1")
# path to BUSCO table directory
# path must end in "/" (e.g. busco_dir/)
busco_dir <- "analyses/paper1/prelim_to_20220220/nemaChromQC/"
shared_busco_tsv_suffix <- ".primary_nematoda_odb10_full_table.tsv"
### Funcions ####
# Function for loading buscos
read_busco <- function(buscoFile){
busco <- read_tsv(buscoFile,
col_names = c("busco_id", "Status", "Sequence",
"start", "end", "strand", "Score", "Length",
"OrthoDB_url", "Description"),
col_types = c("ccciicdicc"),
comment = "#") %>%
filter(Status == "Complete") %>%
select(busco_id, Sequence, start, end)
return(busco)
}
# Function for creating multispecies coordinates index
# From busco_tibble
# busco_tibble needs to have columns "multispecies_sequence" and "end"
buscos_to_multi_sp_index <- function(busco_tb){
multi_sp_index <- group_by(busco_tb, multispecies_sequence) %>%
summarise(size = max(end), .groups = "drop") %>%
arrange(multispecies_sequence) %>%
# slice(gtools::mixedorder(multispecies_sequence, numeric.type = "roman")) %>%
mutate(multispecies_sequence = factor(multispecies_sequence),
multispecies_index = cumsum(size) - size) %>%
select(multispecies_sequence, multispecies_index)
return(multi_sp_index)
}
# Function for multispecies Oxfor plot
multispecies_oxford_plot <- function(busco_tbl, sp_x, sp_y){
x_busco <- filter(busco_tbl, assembly %in% sp_x)
y_busco <- filter(busco_tbl, assembly %in% sp_y)
buscos_to_cmpr <- left_join(x_busco, y_busco,
by = "busco_id")
multi_sp_index_x <- select(buscos_to_cmpr,
multispecies_sequence = multispecies_sequence.x,
end = end.x) %>% buscos_to_multi_sp_index()
multi_sp_index_y <- select(buscos_to_cmpr,
multispecies_sequence = multispecies_sequence.y,
end = end.y) %>% buscos_to_multi_sp_index()
# Extrapolate USCOs to multispecies coordinates
multispecies_buscos <- left_join(buscos_to_cmpr, multi_sp_index_x,
by = c("multispecies_sequence.x" = "multispecies_sequence")) %>%
left_join(multi_sp_index_y,
by = c("multispecies_sequence.y" = "multispecies_sequence")) %>%
mutate(multispecies_start.x = start.x + multispecies_index.x,
multispecies_start.y = start.y + multispecies_index.y) %>%
select(busco_id, multispecies_start.x, multispecies_start.y,
multispecies_sequence.x, multispecies_sequence.y)
# Main plot
p <- ggplot(multispecies_buscos, aes(x = multispecies_start.x,
y = multispecies_start.y)) +
geom_point(alpha = 0.3) +
scale_x_continuous(breaks = multi_sp_index_x$multispecies_index,
labels = multi_sp_index_x$multispecies_sequence,
expand = c(0, 0)) +
scale_y_continuous(breaks = multi_sp_index_y$multispecies_index,
labels = multi_sp_index_y$multispecies_sequence,
expand = c(0, 0)) +
geom_vline(xintercept=multi_sp_index_x$multispecies_index, size = 0.2) +
geom_hline(yintercept=multi_sp_index_y$multispecies_index, size = 0.2) +
xlab("") +
ylab("") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.y = element_text(angle = 90, hjust = 0.5),
axis.ticks = element_blank())
return(p)
}
### Read data ####
buscofiles <- list.files(busco_dir, full.names = T,
pattern = shared_busco_tsv_suffix)
names(buscofiles) <- make.names(sub(paste0(".+//(.+)",
shared_busco_tsv_suffix), "\\1",
buscofiles))
buscos <- map_df(buscofiles, read_busco,
.id = "assembly") %>%
mutate(short_species_id = sub("nxOsc([^0-9]+).+", "\\1", assembly),
multispecies_sequence = paste0(short_species_id, "_", Sequence))
### Make plot ####
multispecies_oxford_plot(buscos, x_species, y_species)
# For cases with many sequences, you might want to explore
# the plot interactively
# You can do so by running the following line
plotly::ggplotly()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment