Skip to content

Instantly share code, notes, and snippets.

@anamariaelek
Last active January 16, 2020 15:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anamariaelek/ac70548f82c21f1141ac0b58dc34054a to your computer and use it in GitHub Desktop.
Save anamariaelek/ac70548f82c21f1141ac0b58dc34054a to your computer and use it in GitHub Desktop.
Nanopore sequencing summary
require(shiny)
require(shinydashboard)
require(data.table)
require(ggplot2)
require(ggpubr)
require(stringr)
require(DT)
require(kableExtra)
require(knitr)
require(plotly)
data <- fread("http://hex.bioinfo.hr/~kristian/transfer/sequencing_summary.txt")
spuzve <- tolower(paste(c("Suberites","domuncula","Sdo",
"Eunapius","subterraneus","Esu",
"Ephydatia","muelleri","Emu",
"Clathrina","clathrus","Ccl"), collapse = "|"))
Spuzve_short <- c("Suberites|Sdo",
"Eunapius|Esu",
"Ephydatia|Emu",
"Clathrina|Ccl")
Spuzve_long <- c("Suberites domuncula",
"Eunapius subterraneus",
"Ephydatia muelleri",
"Clathrina clathrus")
data[, ':='(date = as.Date(str_extract(filename, "\\d{8}"), "%Y%m%d"),
flowcell = str_extract(filename, "F[[:alnum:]]+"),
species = str_extract(tolower(filename), spuzve))]
dates <- data[,.(date_init = unique(date)[1]), .(flowcell)]
data[dates, on = 'flowcell', date_init := i.date_init]
data[, ID := paste(date_init, species, sep = "-")]
setkey(data, ID)
cols <- c("filename", "num_events_template", "passes_filtering",
"sequence_length_template", "mean_qscore_template",
"date", "flowcell", "species", "date_init", "ID")
saveRDS(data[,..cols], "sequencing_summary.RDS")
sumfilt <- data[,
.(num=.N),
.(ID, passes_filtering, flowcell, species)][,
':='(N = sum(num),
perc = round(num/sum(num),2)),ID]
sumfiltcast <- dcast.data.table(sumfilt,
ID ~ passes_filtering,
value.var = c("species","flowcell","N","num","perc"))
sumfiltcast[, `:=`(N_FALSE = NULL, flowcell_FALSE=NULL, species_FALSE = NULL)]
old <- c("species_TRUE", "flowcell_TRUE", "N_TRUE", "num_TRUE", "num_FALSE", "perc_TRUE", "perc_FALSE")
new <- c("species", "flowcell", "N", "N_above", "N_below", "percent_above", "percent_below")
setnames(sumfiltcast, old, new)
setcolorder(sumfiltcast, c("ID", new))
sumnuls <- data[num_events_template == 0, .(N_0=.N), .(ID)]
sumall <- sumfiltcast[sumnuls][, N0_percent := round(N_0/N, 2)]
sp <- lapply(sumall$species, function(x) {
pos <- grep(x, Spuzvetax, ignore.case = TRUE)
Spuzve_long[pos]
})
sumall[, species := unlist(sp)]
sumdt <- data[sequence_length_template != 0,
.(min_length = min(sequence_length_template),
median_length = median(sequence_length_template),
max_length = max(sequence_length_template),
total_length = sum(sequence_length_template),
min_quality = min(mean_qscore_template),
median_quality = median(mean_qscore_template),
max_quality = max(mean_qscore_template)),
by = ID]
sumDT <- sumall[sumdt]
saveRDS(sumDT, "sumDT.RDS")
# # GLOBAL # #
if (!require(shiny)) install.packages("shiny")
if (!require(shinydashboard)) install.packages("shinydashboard")
if (!require(data.table)) install.packages("data.table")
if (!require(ggplot2)) install.packages("ggplot2")
if (!require(ggpubr)) install.packages("ggpubr")
if (!require(stringr)) install.packages("stringr")
if (!require(DT)) install.packages("DT")
if (!require(kableExtra)) install.packages("kableExtra")
if (!require(knitr)) install.packages("knitr")
if (!require(plotly)) install.packages("plotly")
require(shiny)
require(shinydashboard)
require(data.table)
require(ggplot2)
require(ggpubr)
require(stringr)
require(DT)
require(kableExtra)
require(knitr)
require(plotly)
options(shiny.sanitize.errors = FALSE)
data <- readRDS("sequencing_summary.RDS")
sumDT <- readRDS("sumDT.RDS")
Spuzvetax <- c("Suberites domuncula (Sdo)",
"Eunapius subterraneus (Esu)",
"Ephydatia muelleri (Emu)",
"Clathrina clathrus (Ccl)")
# # UI # #
header <- dashboardHeader(title = "Nanopore Summary")
sidebar <- dashboardSidebar(
sidebarMenu(
menuItem("All runs", tabName = "allruns", icon = icon("th")),
menuItem("Selected run", tabName = "onerun", icon = icon("th-list")),
br()
),
sliderInput("bins", "bins in histograms",
min = 10, max = 100, value = 30, step = 5
)
)
body <- dashboardBody(
tabItems(
tabItem(tabName = "allruns",
h2("Summary of all sequencing runs"),
hr(),
fluidRow(
box(
title = "Table summary",
width = 12,
status = "warning", solidHeader = TRUE,
collapsible = TRUE,
DT::dataTableOutput('outsumDT'),
checkboxGroupInput(
"show_vars",
label = "Choose columns:",
choices = names(sumDT),
selected = names(sumDT),
inline = TRUE
),
downloadButton(
"downloadData",
label = "Download"
)
)
),
fluidRow(
box(
title = "Quality",
width = 6, collapsible = TRUE, collapsed = FALSE,
status = "warning", solidHeader = TRUE,
plotlyOutput('qualplot'),
br(),
textOutput('qualthres'),
checkboxInput(
"rmnulls",
"Remove reads with length 0",
value = FALSE,
width = NULL)
),
box(
title = "Length",
width = 6, collapsible = TRUE, collapsed = FALSE,
status = "warning", solidHeader = TRUE,
plotlyOutput('lenplot'),
br(),
checkboxInput(
"loglens",
"Lengths on log scale (removes reads with length 0)",
value = TRUE,
width = NULL)
)
)
),
tabItem(tabName = "onerun",
h2("Summary of a single sequencing run"),
hr(),
fluidRow(
valueBoxOutput("info", width = 4),
valueBoxOutput("filterbox", width = 4),
valueBoxOutput("flowcell", width = 4)
),
fluidRow(
box(
title = "Reads summary",
width = 12,
status = "warning", solidHeader = TRUE,
background = "yellow",
collapsible = FALSE,
tableOutput('outrowDT')
)
),
fluidRow(
tabBox(
title = "Reads length",
side = "right", width = 6,
tabPanel("histogram", plotOutput('histlen')),
tabPanel("density", plotOutput('denslen'))
),
tabBox(
title = "Reads quality",
side = "right", width = 6,
tabPanel("histogram", plotOutput('histq')),
tabPanel("density", plotOutput('densq'))
)
),
fluidRow(
tabBox(
title = "Reads length vs reads quality",
side = "right", width = 6,
#tabPanel("hex", plotOutput("hex")),
tabPanel("density", plotOutput("dens2d"))
)
)
)
)
)
ui <- dashboardPage(header, sidebar, body)
# # SERVER # #
server <- function(input, output) {
# RUNS TABLE SUMMARY
output$outsumDT <- renderDT(
sumDT[, input$show_vars, with=FALSE],
options = list(
scrollX = TRUE,
pageLength = 20,
lengthMenu = c(10, 20, 50)),
selection = list(mode = 'single', selected = 1),
escape = FALSE,
callback = JS(
'table.on("click.dt", "tr", function() {
tabs = $(".sidebar-menu li a");
$(tabs[1]).click();})')
)
output$downloadData <- downloadHandler(
filename = "nanopore_sequencing_summary.csv",
content = function(file)
{
write.csv(sumDT[,input$show_vars, with=FALSE],
file, row.names = FALSE)
}
)
# RUNS PLOT SUMMARY
# For excluding reads with length 0
pData <- reactive({
if (input$rmnulls == TRUE)
data[num_events_template != 0]
else data
})
thres <- data[passes_filtering == TRUE,
min(mean_qscore_template)]
l <- list(
x = 100, y = 0.5,
font = list(
family = "sans-serif",
size = 12
)
)
output$qualplot <- renderPlotly({
qp <- ggplot(pData(),
aes(x = mean_qscore_template, colour = ID)) +
geom_density(aes(y = ..count..)) +
scale_colour_hue(h = c(90, 300)) +
geom_vline(xintercept = thres,
colour = "red", alpha = 0.2) +
theme_pubr(border = TRUE, legend = "top") +
scale_y_continuous(labels = function(x)
format(x, digits = 1, scientific = TRUE)) +
guides(colour = guide_legend(title = NULL)) +
labs(x = "mean quality score")
qp <- ggplotly(qp, tooltip = c("colour"))
layout(qp, legend = l, margin = list(l = 100))
})
output$qualthres <- renderText(
"(red vertical line indicates quality threshold value)"
)
output$lenplot <- renderPlotly({
pp <- ggplot(pData(),
aes(x = sequence_length_template, colour = ID)) +
geom_density(aes(y = ..count..)) +
scale_colour_hue(h = c(90, 300)) +
theme_pubr(border = TRUE, legend = "top") +
scale_y_continuous(labels = function(x)
format(x, digits = 1, scientific = TRUE)) +
guides(colour = guide_legend(title = NULL)) +
labs(x = "sequence length (bp)")
if (input$loglens == TRUE)
pp <- pp + scale_x_log10()
pp <- ggplotly(pp, tooltip = c("colour"))
layout(pp, legend = l, margin = list(l = 100))
})
# CHOSEN RUN
selectedData <- reactive({
req(input$outsumDT_rows_selected)
row <- input$outsumDT_rows_selected
data[ID == sumDT[row[1], ID]] %>%
mutate("passes\nfiltering" = passes_filtering) %>%
select(-passes_filtering)
})
output$info <- renderValueBox({
req(input$outsumDT_rows_selected)
valueBox(
selectedData()$date_init[1],
grep(selectedData()$species[1], Spuzvetax,
ignore.case = TRUE, value = TRUE),
icon = icon("list"),
color = "purple"
)
})
output$filterbox <- renderValueBox({
req(input$outsumDT_rows_selected)
valueBox(
paste0(
round(
sumDT[input$outsumDT_rows_selected,
percent_above],
2) * 100,
"%"),
"passes filtering",
icon = icon("thumbs-up", lib = "glyphicon"),
color = "teal"
)
})
output$flowcell <- renderValueBox({
req(input$outsumDT_rows_selected)
valueBox(
unique(selectedData()$flowcell),
"flowcell",
icon = icon("cog"),
color = "light-blue"
)
})
#output$outrowDT <- renderDT(
# sumDT[c(input$outsumDT_rows_selected, 1)[1], input$show_vars, with=FALSE],
# options = list(dom = 't', scrollX = TRUE, lengthChange = FALSE),
# selection = list(mode = 'none')
#)
output$outrowDT <- function(){
req(input$outsumDT_rows_selected)
show_vars_less <- input$show_vars[
!(input$show_vars %in% c("ID", "species", "flowcell", "percent_above", "percent_below"))]
sumDT[input$outsumDT_rows_selected, show_vars_less, with=FALSE] %>%
knitr::kable("html") %>%
kable_styling("striped") %>%
#scroll_box(width = "100%") %>% #throws error
row_spec(0, align = "center") %>%
row_spec(1, color = "white", background = "#f39c12", align = "center")
#add_header_above(c(" ", "Group 1" = 5, "Group 2" = 6))
}
# CHOSEN RUN PLOTS
output$densq <- renderPlot({
req(input$outsumDT_rows_selected)
ggplot(selectedData(), aes(x = mean_qscore_template,
fill = `passes\nfiltering`,
colour = `passes\nfiltering`)) +
geom_density(aes(y = ..count..), alpha = 0.5) +
theme_pubr(border = TRUE, legend = "right") +
labs(x = "mean quality score", y = "read count")
})
output$denslen <- renderPlot({
req(input$outsumDT_rows_selected)
ggplot(selectedData(),
aes(x = sequence_length_template,
fill = `passes\nfiltering`,
colour = `passes\nfiltering`)) +
geom_density(aes(y = ..count..), alpha = 0.5) +
theme_pubr(border = TRUE, legend = "right") +
labs(x = "sequence length (bp)", y = "read count")
})
output$histq <- renderPlot({
req(input$outsumDT_rows_selected)
ggplot(selectedData(), aes(x = mean_qscore_template, fill = `passes\nfiltering`)) +
geom_histogram(bins = input$bins, alpha = 0.5, position = "stack") +
theme_pubr(border = TRUE, legend = "right") +
labs(x = "mean quality score", y = "read count")
})
output$histlen <- renderPlot({
req(input$outsumDT_rows_selected)
ggplot(selectedData(), aes(x = sequence_length_template, fill = `passes\nfiltering`)) +
geom_histogram(bins = input$bins, alpha = 0.5, position = "dodge") +
theme_pubr(border = TRUE, legend = "right") +
labs(x = "sequence length (bp)", y = "read count")
})
# output$hex <- renderPlot({
# ggplot(selectedData(), aes(sequence_length_template, mean_qscore_template,
# colour = `passes\nfiltering`
# )) +
# geom_hex(alpha = 0.85) +
# #facet_wrap(~`passes\nfiltering`) +
# scale_fill_gradient(name = "counts", trans = "log", c(2, 10, 50, 250, 1250, 6000)) +
# theme_pubr(border = TRUE, legend = "right")
# })
output$dens2d <- renderPlot({
ggplot(selectedData(), aes(sequence_length_template, mean_qscore_template,
colour = `passes\nfiltering`)) +
geom_density2d(bins = input$bins) +
theme_pubr(border = TRUE, legend = "right") +
labs(x = "sequence length (bp)", y = "mean quality score")
})
}
# # RUN # #
shinyApp(ui, server)
View raw

(Sorry about that, but we can’t show files that are this big right now.)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment