Skip to content

Instantly share code, notes, and snippets.

@anamariaelek anamariaelek/data.R
Last active Jan 16, 2020

Embed
What would you like to do?
Nanopore sequencing summary
require(shiny)
require(shinydashboard)
require(data.table)
require(ggplot2)
require(ggpubr)
require(stringr)
require(DT)
require(kableExtra)
require(knitr)
require(plotly)
data <- fread("http://hex.bioinfo.hr/~kristian/transfer/sequencing_summary.txt")
spuzve <- tolower(paste(c("Suberites","domuncula","Sdo",
"Eunapius","subterraneus","Esu",
"Ephydatia","muelleri","Emu",
"Clathrina","clathrus","Ccl"), collapse = "|"))
Spuzve_short <- c("Suberites|Sdo",
"Eunapius|Esu",
"Ephydatia|Emu",
"Clathrina|Ccl")
Spuzve_long <- c("Suberites domuncula",
"Eunapius subterraneus",
"Ephydatia muelleri",
"Clathrina clathrus")
data[, ':='(date = as.Date(str_extract(filename, "\\d{8}"), "%Y%m%d"),
flowcell = str_extract(filename, "F[[:alnum:]]+"),
species = str_extract(tolower(filename), spuzve))]
dates <- data[,.(date_init = unique(date)[1]), .(flowcell)]
data[dates, on = 'flowcell', date_init := i.date_init]
data[, ID := paste(date_init, species, sep = "-")]
setkey(data, ID)
cols <- c("filename", "num_events_template", "passes_filtering",
"sequence_length_template", "mean_qscore_template",
"date", "flowcell", "species", "date_init", "ID")
saveRDS(data[,..cols], "sequencing_summary.RDS")
sumfilt <- data[,
.(num=.N),
.(ID, passes_filtering, flowcell, species)][,
':='(N = sum(num),
perc = round(num/sum(num),2)),ID]
sumfiltcast <- dcast.data.table(sumfilt,
ID ~ passes_filtering,
value.var = c("species","flowcell","N","num","perc"))
sumfiltcast[, `:=`(N_FALSE = NULL, flowcell_FALSE=NULL, species_FALSE = NULL)]
old <- c("species_TRUE", "flowcell_TRUE", "N_TRUE", "num_TRUE", "num_FALSE", "perc_TRUE", "perc_FALSE")
new <- c("species", "flowcell", "N", "N_above", "N_below", "percent_above", "percent_below")
setnames(sumfiltcast, old, new)
setcolorder(sumfiltcast, c("ID", new))
sumnuls <- data[num_events_template == 0, .(N_0=.N), .(ID)]
sumall <- sumfiltcast[sumnuls][, N0_percent := round(N_0/N, 2)]
sp <- lapply(sumall$species, function(x) {
pos <- grep(x, Spuzvetax, ignore.case = TRUE)
Spuzve_long[pos]
})
sumall[, species := unlist(sp)]
sumdt <- data[sequence_length_template != 0,
.(min_length = min(sequence_length_template),
median_length = median(sequence_length_template),
max_length = max(sequence_length_template),
total_length = sum(sequence_length_template),
min_quality = min(mean_qscore_template),
median_quality = median(mean_qscore_template),
max_quality = max(mean_qscore_template)),
by = ID]
sumDT <- sumall[sumdt]
saveRDS(sumDT, "sumDT.RDS")
# # GLOBAL # #
if (!require(shiny)) install.packages("shiny")
if (!require(shinydashboard)) install.packages("shinydashboard")
if (!require(data.table)) install.packages("data.table")
if (!require(ggplot2)) install.packages("ggplot2")
if (!require(ggpubr)) install.packages("ggpubr")
if (!require(stringr)) install.packages("stringr")
if (!require(DT)) install.packages("DT")
if (!require(kableExtra)) install.packages("kableExtra")
if (!require(knitr)) install.packages("knitr")
if (!require(plotly)) install.packages("plotly")
require(shiny)
require(shinydashboard)
require(data.table)
require(ggplot2)
require(ggpubr)
require(stringr)
require(DT)
require(kableExtra)
require(knitr)
require(plotly)
options(shiny.sanitize.errors = FALSE)
data <- readRDS("sequencing_summary.RDS")
sumDT <- readRDS("sumDT.RDS")
Spuzvetax <- c("Suberites domuncula (Sdo)",
"Eunapius subterraneus (Esu)",
"Ephydatia muelleri (Emu)",
"Clathrina clathrus (Ccl)")
# # UI # #
header <- dashboardHeader(title = "Nanopore Summary")
sidebar <- dashboardSidebar(
sidebarMenu(
menuItem("All runs", tabName = "allruns", icon = icon("th")),
menuItem("Selected run", tabName = "onerun", icon = icon("th-list")),
br()
),
sliderInput("bins", "bins in histograms",
min = 10, max = 100, value = 30, step = 5
)
)
body <- dashboardBody(
tabItems(
tabItem(tabName = "allruns",
h2("Summary of all sequencing runs"),
hr(),
fluidRow(
box(
title = "Table summary",
width = 12,
status = "warning", solidHeader = TRUE,
collapsible = TRUE,
DT::dataTableOutput('outsumDT'),
checkboxGroupInput(
"show_vars",
label = "Choose columns:",
choices = names(sumDT),
selected = names(sumDT),
inline = TRUE
),
downloadButton(
"downloadData",
label = "Download"
)
)
),
fluidRow(
box(
title = "Quality",
width = 6, collapsible = TRUE, collapsed = FALSE,
status = "warning", solidHeader = TRUE,
plotlyOutput('qualplot'),
br(),
textOutput('qualthres'),
checkboxInput(
"rmnulls",
"Remove reads with length 0",
value = FALSE,
width = NULL)
),
box(
title = "Length",
width = 6, collapsible = TRUE, collapsed = FALSE,
status = "warning", solidHeader = TRUE,
plotlyOutput('lenplot'),
br(),
checkboxInput(
"loglens",
"Lengths on log scale (removes reads with length 0)",
value = TRUE,
width = NULL)
)
)
),
tabItem(tabName = "onerun",
h2("Summary of a single sequencing run"),
hr(),
fluidRow(
valueBoxOutput("info", width = 4),
valueBoxOutput("filterbox", width = 4),
valueBoxOutput("flowcell", width = 4)
),
fluidRow(
box(
title = "Reads summary",
width = 12,
status = "warning", solidHeader = TRUE,
background = "yellow",
collapsible = FALSE,
tableOutput('outrowDT')
)
),
fluidRow(
tabBox(
title = "Reads length",
side = "right", width = 6,
tabPanel("histogram", plotOutput('histlen')),
tabPanel("density", plotOutput('denslen'))
),
tabBox(
title = "Reads quality",
side = "right", width = 6,
tabPanel("histogram", plotOutput('histq')),
tabPanel("density", plotOutput('densq'))
)
),
fluidRow(
tabBox(
title = "Reads length vs reads quality",
side = "right", width = 6,
#tabPanel("hex", plotOutput("hex")),
tabPanel("density", plotOutput("dens2d"))
)
)
)
)
)
ui <- dashboardPage(header, sidebar, body)
# # SERVER # #
server <- function(input, output) {
# RUNS TABLE SUMMARY
output$outsumDT <- renderDT(
sumDT[, input$show_vars, with=FALSE],
options = list(
scrollX = TRUE,
pageLength = 20,
lengthMenu = c(10, 20, 50)),
selection = list(mode = 'single', selected = 1),
escape = FALSE,
callback = JS(
'table.on("click.dt", "tr", function() {
tabs = $(".sidebar-menu li a");
$(tabs[1]).click();})')
)
output$downloadData <- downloadHandler(
filename = "nanopore_sequencing_summary.csv",
content = function(file)
{
write.csv(sumDT[,input$show_vars, with=FALSE],
file, row.names = FALSE)
}
)
# RUNS PLOT SUMMARY
# For excluding reads with length 0
pData <- reactive({
if (input$rmnulls == TRUE)
data[num_events_template != 0]
else data
})
thres <- data[passes_filtering == TRUE,
min(mean_qscore_template)]
l <- list(
x = 100, y = 0.5,
font = list(
family = "sans-serif",
size = 12
)
)
output$qualplot <- renderPlotly({
qp <- ggplot(pData(),
aes(x = mean_qscore_template, colour = ID)) +
geom_density(aes(y = ..count..)) +
scale_colour_hue(h = c(90, 300)) +
geom_vline(xintercept = thres,
colour = "red", alpha = 0.2) +
theme_pubr(border = TRUE, legend = "top") +
scale_y_continuous(labels = function(x)
format(x, digits = 1, scientific = TRUE)) +
guides(colour = guide_legend(title = NULL)) +
labs(x = "mean quality score")
qp <- ggplotly(qp, tooltip = c("colour"))
layout(qp, legend = l, margin = list(l = 100))
})
output$qualthres <- renderText(
"(red vertical line indicates quality threshold value)"
)
output$lenplot <- renderPlotly({
pp <- ggplot(pData(),
aes(x = sequence_length_template, colour = ID)) +
geom_density(aes(y = ..count..)) +
scale_colour_hue(h = c(90, 300)) +
theme_pubr(border = TRUE, legend = "top") +
scale_y_continuous(labels = function(x)
format(x, digits = 1, scientific = TRUE)) +
guides(colour = guide_legend(title = NULL)) +
labs(x = "sequence length (bp)")
if (input$loglens == TRUE)
pp <- pp + scale_x_log10()
pp <- ggplotly(pp, tooltip = c("colour"))
layout(pp, legend = l, margin = list(l = 100))
})
# CHOSEN RUN
selectedData <- reactive({
req(input$outsumDT_rows_selected)
row <- input$outsumDT_rows_selected
data[ID == sumDT[row[1], ID]] %>%
mutate("passes\nfiltering" = passes_filtering) %>%
select(-passes_filtering)
})
output$info <- renderValueBox({
req(input$outsumDT_rows_selected)
valueBox(
selectedData()$date_init[1],
grep(selectedData()$species[1], Spuzvetax,
ignore.case = TRUE, value = TRUE),
icon = icon("list"),
color = "purple"
)
})
output$filterbox <- renderValueBox({
req(input$outsumDT_rows_selected)
valueBox(
paste0(
round(
sumDT[input$outsumDT_rows_selected,
percent_above],
2) * 100,
"%"),
"passes filtering",
icon = icon("thumbs-up", lib = "glyphicon"),
color = "teal"
)
})
output$flowcell <- renderValueBox({
req(input$outsumDT_rows_selected)
valueBox(
unique(selectedData()$flowcell),
"flowcell",
icon = icon("cog"),
color = "light-blue"
)
})
#output$outrowDT <- renderDT(
# sumDT[c(input$outsumDT_rows_selected, 1)[1], input$show_vars, with=FALSE],
# options = list(dom = 't', scrollX = TRUE, lengthChange = FALSE),
# selection = list(mode = 'none')
#)
output$outrowDT <- function(){
req(input$outsumDT_rows_selected)
show_vars_less <- input$show_vars[
!(input$show_vars %in% c("ID", "species", "flowcell", "percent_above", "percent_below"))]
sumDT[input$outsumDT_rows_selected, show_vars_less, with=FALSE] %>%
knitr::kable("html") %>%
kable_styling("striped") %>%
#scroll_box(width = "100%") %>% #throws error
row_spec(0, align = "center") %>%
row_spec(1, color = "white", background = "#f39c12", align = "center")
#add_header_above(c(" ", "Group 1" = 5, "Group 2" = 6))
}
# CHOSEN RUN PLOTS
output$densq <- renderPlot({
req(input$outsumDT_rows_selected)
ggplot(selectedData(), aes(x = mean_qscore_template,
fill = `passes\nfiltering`,
colour = `passes\nfiltering`)) +
geom_density(aes(y = ..count..), alpha = 0.5) +
theme_pubr(border = TRUE, legend = "right") +
labs(x = "mean quality score", y = "read count")
})
output$denslen <- renderPlot({
req(input$outsumDT_rows_selected)
ggplot(selectedData(),
aes(x = sequence_length_template,
fill = `passes\nfiltering`,
colour = `passes\nfiltering`)) +
geom_density(aes(y = ..count..), alpha = 0.5) +
theme_pubr(border = TRUE, legend = "right") +
labs(x = "sequence length (bp)", y = "read count")
})
output$histq <- renderPlot({
req(input$outsumDT_rows_selected)
ggplot(selectedData(), aes(x = mean_qscore_template, fill = `passes\nfiltering`)) +
geom_histogram(bins = input$bins, alpha = 0.5, position = "stack") +
theme_pubr(border = TRUE, legend = "right") +
labs(x = "mean quality score", y = "read count")
})
output$histlen <- renderPlot({
req(input$outsumDT_rows_selected)
ggplot(selectedData(), aes(x = sequence_length_template, fill = `passes\nfiltering`)) +
geom_histogram(bins = input$bins, alpha = 0.5, position = "dodge") +
theme_pubr(border = TRUE, legend = "right") +
labs(x = "sequence length (bp)", y = "read count")
})
# output$hex <- renderPlot({
# ggplot(selectedData(), aes(sequence_length_template, mean_qscore_template,
# colour = `passes\nfiltering`
# )) +
# geom_hex(alpha = 0.85) +
# #facet_wrap(~`passes\nfiltering`) +
# scale_fill_gradient(name = "counts", trans = "log", c(2, 10, 50, 250, 1250, 6000)) +
# theme_pubr(border = TRUE, legend = "right")
# })
output$dens2d <- renderPlot({
ggplot(selectedData(), aes(sequence_length_template, mean_qscore_template,
colour = `passes\nfiltering`)) +
geom_density2d(bins = input$bins) +
theme_pubr(border = TRUE, legend = "right") +
labs(x = "sequence length (bp)", y = "mean quality score")
})
}
# # RUN # #
shinyApp(ui, server)
View raw

(Sorry about that, but we can’t show files that are this big right now.)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.