Skip to content

Instantly share code, notes, and snippets.

@rvalieris
Created April 23, 2018 19:49
Show Gist options
  • Save rvalieris/a51af3a7dc82d972a0228f321b2411d8 to your computer and use it in GitHub Desktop.
Save rvalieris/a51af3a7dc82d972a0228f321b2411d8 to your computer and use it in GitHub Desktop.
change_triplet <- c(
"C>A:ACA","C>A:ACC","C>A:ACG","C>A:ACT","C>A:CCA","C>A:CCC","C>A:CCG",
"C>A:CCT","C>A:GCA","C>A:GCC","C>A:GCG","C>A:GCT","C>A:TCA","C>A:TCC",
"C>A:TCG","C>A:TCT","C>G:ACA","C>G:ACC","C>G:ACG","C>G:ACT","C>G:CCA",
"C>G:CCC","C>G:CCG","C>G:CCT","C>G:GCA","C>G:GCC","C>G:GCG","C>G:GCT",
"C>G:TCA","C>G:TCC","C>G:TCG","C>G:TCT","C>T:ACA","C>T:ACC","C>T:ACG",
"C>T:ACT","C>T:CCA","C>T:CCC","C>T:CCG","C>T:CCT","C>T:GCA","C>T:GCC",
"C>T:GCG","C>T:GCT","C>T:TCA","C>T:TCC","C>T:TCG","C>T:TCT","T>A:ATA",
"T>A:ATC","T>A:ATG","T>A:ATT","T>A:CTA","T>A:CTC","T>A:CTG","T>A:CTT",
"T>A:GTA","T>A:GTC","T>A:GTG","T>A:GTT","T>A:TTA","T>A:TTC","T>A:TTG",
"T>A:TTT","T>C:ATA","T>C:ATC","T>C:ATG","T>C:ATT","T>C:CTA","T>C:CTC",
"T>C:CTG","T>C:CTT","T>C:GTA","T>C:GTC","T>C:GTG","T>C:GTT","T>C:TTA",
"T>C:TTC","T>C:TTG","T>C:TTT","T>G:ATA","T>G:ATC","T>G:ATG","T>G:ATT",
"T>G:CTA","T>G:CTC","T>G:CTG","T>G:CTT","T>G:GTA","T>G:GTC","T>G:GTG",
"T>G:GTT","T>G:TTA","T>G:TTC","T>G:TTG","T>G:TTT"
)
revcomp <- function(s) {
return(as.character(reverseComplement(DNAString(s))))
}
genOpportunityFromFasta <- function(dna_ss, target_regions, nsamples=1) {
# make sure there are no overlaps
target_regions <- Reduce(union, as(target_regions, "GRangesList"))
# count the kmers in the region
kmers <- colSums(oligonucleotideFrequency(dna_ss[target_regions], 3))
# turn back into matrix
c0 <- names(kmers)
dim(kmers) <- c(1, length(kmers))
colnames(kmers) <- c0
uk <- sapply(colnames(kmers),
function(x){y<-substr(x,2,2);if(y=="C"||y=="T"){x}else{revcomp(x)}})
m <- matrix(0, nrow=1, ncol=length(change_triplet))
colnames(m) <- change_triplet
for(i in 1:length(kmers)) {
v <- kmers[i]
k <- uk[[i]]
for(j in grep(k,change_triplet)) {
m[1,j] = v + m[1,j]
}
}
# replicate the matrix over the number of desired samples
m <- do.call("rbind",rep(list(m),nsamples))
return(m)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment