Skip to content

Instantly share code, notes, and snippets.

@jergosh
Created May 11, 2018 09:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jergosh/663fdac0dc40f36f62f1bc5d63044785 to your computer and use it in GitHub Desktop.
Save jergosh/663fdac0dc40f36f62f1bc5d63044785 to your computer and use it in GitHub Desktop.
library("seqinr")
library("Biostrings")
library("MASS")
library("GenomicRanges")
library("dndscv")
## Helper methods copied from dndscv
nt = c("A","C","G","T")
compnt = setNames(rev(nt), nt)
chr2cds = function(pos,cds_int,strand) {
if (strand==1) {
return(which(pos==unlist(apply(cds_int, 1, function(x) x[1]:x[2]))))
} else if (strand==-1) {
return(which(pos==rev(unlist(apply(cds_int, 1, function(x) x[1]:x[2])))))
}
}
rest_nt <- function(n) {
rest_nts <- nt[-which(nt == n)]
sample(rest_nts, 1)
}
data("refcds_hg19", package="dndscv")
TP53 <- RefCDS[which(sapply(RefCDS, function(x) x$gene_name) == "TP53")]
cds_int <- TP53[[1]]$intervals_cds
cds_pos <- unlist(apply(cds_int, 1, function(x) x[1]:x[2]))
splice_pos <- TP53[[1]]$intervals_splice
# Use mutations from example dataset but remove all mapping to TP53
data("dataset_simbreast", package="dndscv")
my_mutations <- subset(mutations, !(chr == "17" & pos %in% cds_pos) & nchar(ref) == 1 & !(pos %in% splice_pos))
# Sample 50 random positions in the TP53 CDS
my_mut_pos <- sample(cds_pos, 50)
# Make a data frame with random mutations at sampled positions
my_mut_cds_pos <- sapply(my_mut_pos, function(x) (chr2cds(x, TP53[[1]]$intervals_cds, TP53[[1]]$strand)))
my_mut_ref <- sapply(my_mut_cds_pos,
function(x) as.character(TP53[[1]]$seq_cds[x]))
my_muts <- data.frame(sampleID=paste0("S_", 1:length(my_mut_pos)),
chr=TP53[[1]]$chr,pos=my_mut_pos,
ref=compnt[my_mut_ref],
mut=sapply(my_mut_ref, rest_nt),
stringsAsFactors=F)
## Apply dndscv on the combined mutations
dndsout = dndscv(rbind(my_mutations[, 1:5], my_muts))
sel_cv = dndsout$sel_cv
signif_genes = sel_cv[sel_cv$qglobal_cv<0.1, c("gene_name","qglobal_cv")]
rownames(signif_genes) = NULL
print(signif_genes)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment