Created
August 2, 2023 13:25
-
-
Save kumeS/505682a73066cbd28f20ba5a60c583c8 to your computer and use it in GitHub Desktop.
This function analyzes the samples and top barcodes that match the given index and generates a sequence logo.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Index Mismatch Sequence Analysis | |
#' | |
#' This function analyzes the samples and top barcodes that match the given index and generates a sequence logo. | |
#' | |
#' @title Index Mismatch Sequence Analysis | |
#' @description Analyzes the samples and top barcodes that match the given index, and generates a sequence logo. The function takes two CSV files and an index as input, and optionally saves the sequence logo as a PNG file. | |
#' @param x1 A data frame containing the demultiplex stats, read from "Demultiplex_Stats.csv". | |
#' @param x2 A data frame containing the top unknown barcodes, read from "Top_Unknown_Barcodes.csv". | |
#' @param index A string representing the index to be matched. | |
#' @param AllData A logical value indicating whether to include all data in the result. Default is TRUE. | |
#' @param SavePNG A logical value indicating whether to save the sequence logo as a PNG file. Default is TRUE. | |
#' @importFrom assertthat is.data.frame is.string is.count noNA | |
#' @importFrom seqLogo makePWM seqLogo | |
#' @return A PWM object representing the sequence logo. | |
#' @export indexMismatchSeq | |
#' @author Satoshi Kume | |
#' @examples | |
#' \dontrun{ | |
#' x1 <- Demultiplex_Stats_proc(read.csv("./Demultiplex_Stats.csv", header = TRUE, check.names = FALSE)[1:12,]) | |
#' x2 <- read.csv("./Top_Unknown_Barcodes.csv", header = TRUE, check.names = FALSE) | |
#' index <- "GGGGGGGGGG" | |
#' result <- indexMismatchSeq(x1, x2, index) | |
#' } | |
indexMismatchSeq <- function(x1, x2, index, AllData = TRUE, SavePNG = TRUE){ | |
assertthat::is.data.frame(x1) | |
assertthat::is.data.frame(x2) | |
assertthat::is.string(index) | |
assertthat::is.count(length(index)) | |
assertthat::noNA(index) | |
# Sample: Extract relevant columns and rename | |
x1a <- x1[,c("Index", "index1", "index2")] | |
colnames(x1a) <- c("x0", "X1", "X2") | |
x1a$X3 <- x1$`# Reads` | |
res1 <- base::agrepl(pattern=index, x=x1a$x0, max.distance = 0.1) | |
if(sum(res1) != 1){ stop() } | |
x1b <- x1a[res1,] | |
r <- rownames(x1b) | |
result1 <- data.frame(matrix(unlist(strsplit(paste0(x1b$X1, x1b$X2), "")), ncol=20, byrow = T)) | |
result1$X21 <- x1b$X3 | |
#Top | |
x2$Index <- paste0(x2$index, x2$index2) | |
res2 <- base::agrepl(pattern=index, x=x2$Index, max.distance = 0.25) | |
b <- x2[res2,] | |
#head(b) | |
#dim(b) | |
A <- stringdist::stringdistmatrix(a=index, b=b$Index, method="osa", nthread=2, useNames="strings") | |
A1 <- colnames(A)[A < 5] | |
b <- b[b$Index %in% A1,] | |
#dim(b) | |
result2 <- data.frame(matrix(unlist(strsplit(paste0(b$index, b$index2), "")), ncol=20, byrow = T)) | |
#head(result2) | |
#dim(result2) | |
result2$X21 <- b$`# Reads` | |
#head(result2) | |
if(AllData){ | |
result <- rbind(result1, result2) | |
}else{ | |
result <- result2 | |
} | |
dd <- matrix(0, 5, nchar(index)) | |
rownames(dd) <- c("A", "C", "G", "N", "T") | |
colnames(dd) <- paste0("X", formatC(1:nchar(index), width = 2, flag = 0)) | |
for(n in 1:(ncol(result)-1)){ | |
#n <- 1 | |
d <- tapply(result$X21, factor(result[,n]), sum)/sum(result$X21) | |
if(!all(diff(order(names(d))) == 1)){warning("Failed!!")} | |
dd[rownames(dd) %in% names(d),n] <- as.numeric(d) | |
} | |
ddd <- dd[-4,] | |
#d1 <- apply(ddd, 2, sum) | |
#dddd <- ddd/d1 | |
#apply(dddd, 2, sum) == 1 | |
p <- makePWM(ddd) | |
#consensus(p) | |
if(SavePNG){ | |
png(file = paste0(gsub(" ", "0", formatC(r, width = 2)), "_",index1, ".png"), res = 150, width = 1200, height = 600) | |
seqLogo(p) | |
dev.off() | |
} | |
seqLogo(p) | |
return(p) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment