Created
June 18, 2012 02:13
-
-
Save astatham/2946426 to your computer and use it in GitHub Desktop.
NOMe chart generator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
GpCplot <- function(amplicon, reads, main="", minScore=150) { | |
require(Biostrings) | |
require(GenomicRanges) | |
if (class(amplicon)=="character") amplicon <- DNAString(amplicon) | |
amplicon.conv <- DNAString(gsub("C", "T", gsub("GC", "GY", gsub("CG", "YG", amplicon)))) | |
reads <- read.DNAStringSet(reads) | |
readsHitAll <- list("plus"=pairwiseAlignment(reads, amplicon.conv, type="local-global"), "minus"=pairwiseAlignment(reverseComplement(reads), amplicon.conv, type="local-global")) | |
readsStrand <- ifelse(score(readsHitAll$plus)>score(readsHitAll$minus), "+", "-") | |
reads2 <- reads | |
reads2[readsStrand=="-"] <- reverseComplement(reads2[readsStrand=="-"]) | |
readsHitAll <- pairwiseAlignment(reads2, amplicon.conv, type="local-global") | |
readsSummaryAll <- GRanges("hit", IRanges(start(pattern(readsHitAll)), end(pattern(readsHitAll))), readsStrand, name=gsub(".*/", "", names(reads)), score=score(readsHitAll)) | |
Cs <- start(matchPattern("C", amplicon)) | |
CpG <- start(matchPattern("CG", amplicon)) | |
GpC <- start(matchPattern("GC", amplicon))+1 | |
HCG <- setdiff(CpG, GpC) | |
GCH <- setdiff(GpC, CpG) | |
conversionCs <- setdiff(setdiff(Cs, CpG), GpC) | |
values(readsSummaryAll)$conversion <- rowMeans(as.matrix(readsHitAll)[,conversionCs]=="T") | |
readsHit <- readsHitAll[values(readsSummaryAll)$score>=minScore] | |
readsSummary <- readsSummaryAll[values(readsSummaryAll)$score>=minScore] | |
#set up the plot | |
plot(x=NA, type="n", xlim=c(-10, length(amplicon)+10), ylim=c(-length(readsSummary)-5,7), xaxt="n", yaxt="n", xlab="", ylab="Clones", main=main) | |
lines(x=c(1, length(amplicon)), y=c(3,3), col="red", lwd=3) | |
#draw GpCs | |
sapply(start(matchPattern("GCH", amplicon, fixed=FALSE))+1, function(x) { | |
lines(c(x,x), y=c(3, 4), col="black", lwd=2) | |
points(x, 4.5, cex=3, col="black", pch=19) | |
points(x, 4.5, cex=2.8, col="white", pch=19) | |
}) | |
#draw CpGs | |
sapply(start(matchPattern("HCG", amplicon, fixed=FALSE))+1, function(x) { | |
lines(c(x,x), y=c(3, 2), col="black", lwd=2) | |
points(x, 1.5, cex=3, col="black", pch=19) | |
points(x, 1.5, cex=2.8, col="lightblue", pch=19) | |
}) | |
#create matrix of calls, sort by similarity | |
calls.mat <- as.matrix(readsHit)[,GCH] | |
reads.order <- hclust(dist(calls.mat=="T"))$order | |
calls.mat <- calls.mat[reads.order,] | |
#draw reads w/ calls | |
for (i in 1:length(reads.order)) { | |
lines(x=c(1, length(amplicon)), y=c(-i, -i), col="blue") | |
sapply(1:length(GCH), function(x) { | |
points(x=GCH[x]-1, y=-i, pch=19, col="#003366", cex=3) | |
points(x=GCH[x]-1, y=-i, pch=19, col=ifelse(calls.mat[i, x]=="C", "#00CC99", ifelse(calls.mat[i,x]=="T", "lightgrey", "white")), cex=2.8) | |
}) | |
} | |
#add % conversion | |
text(x=length(amplicon)+12, y=-(1:length(readsSummary)), labels=paste(round(values(readsSummary)$conversion*100, 1), "%", sep=""), cex=0.7) | |
invisible(list("readsHit"=readsHit, "readsSummary"=readsSummary, "readsHitAll"=readsHitAll, "readsSummaryAll"=readsSummaryAll, "reads.order"=reads.order, "calls.mat"=calls.mat)) | |
} | |
CpGplot <- function(amplicon, reads, main="", minScore=150) { | |
require(Biostrings) | |
require(GenomicRanges) | |
if (class(amplicon)=="character") amplicon <- DNAString(amplicon) | |
amplicon.conv <- DNAString(gsub("C", "T", gsub("GC", "GY", gsub("CG", "YG", amplicon)))) | |
reads <- read.DNAStringSet(reads) | |
readsHitAll <- list("plus"=pairwiseAlignment(reads, amplicon.conv, type="local-global"), "minus"=pairwiseAlignment(reverseComplement(reads), amplicon.conv, type="local-global")) | |
readsStrand <- ifelse(score(readsHitAll$plus)>score(readsHitAll$minus), "+", "-") | |
reads2 <- reads | |
reads2[readsStrand=="-"] <- reverseComplement(reads2[readsStrand=="-"]) | |
readsHitAll <- pairwiseAlignment(reads2, amplicon.conv, type="local-global") | |
readsSummaryAll <- GRanges("hit", IRanges(start(pattern(readsHitAll)), end(pattern(readsHitAll))), readsStrand, name=gsub(".*/", "", names(reads)), score=score(readsHitAll)) | |
Cs <- start(matchPattern("C", amplicon)) | |
CpG <- start(matchPattern("CG", amplicon)) | |
GpC <- start(matchPattern("GC", amplicon))+1 | |
HCG <- setdiff(CpG, GpC) | |
GCH <- setdiff(GpC, CpG) | |
conversionCs <- setdiff(setdiff(Cs, CpG), GpC) | |
values(readsSummaryAll)$conversion <- rowMeans(as.matrix(readsHitAll)[,conversionCs]=="T") | |
readsHit <- readsHitAll[values(readsSummaryAll)$score>=minScore] | |
readsSummary <- readsSummaryAll[values(readsSummaryAll)$score>=minScore] | |
#set up the plot | |
plot(x=NA, type="n", xlim=c(-10, length(amplicon)+10), ylim=c(-length(readsSummary)-5,7), xaxt="n", yaxt="n", xlab="", ylab="Clones", main=main) | |
lines(x=c(1, length(amplicon)), y=c(3,3), col="red", lwd=3) | |
#draw GpCs | |
sapply(start(matchPattern("GCH", amplicon, fixed=FALSE))+1, function(x) { | |
lines(c(x,x), y=c(3, 4), col="black", lwd=2) | |
points(x, 4.5, cex=3, col="black", pch=19) | |
points(x, 4.5, cex=2.8, col="white", pch=19) | |
}) | |
#draw CpGs | |
sapply(start(matchPattern("HCG", amplicon, fixed=FALSE))+1, function(x) { | |
lines(c(x,x), y=c(3, 2), col="black", lwd=2) | |
points(x, 1.5, cex=3, col="black", pch=19) | |
points(x, 1.5, cex=2.8, col="lightblue", pch=19) | |
}) | |
#create matrix of calls, sort by similarity | |
calls.mat <- as.matrix(readsHit)[,HCG] | |
calls.mat2 <- as.matrix(readsHit)[,GCH] | |
reads.order <- hclust(dist(calls.mat2=="T"))$order | |
calls.mat <- calls.mat[reads.order,] | |
#draw reads w/ calls | |
for (i in 1:length(reads.order)) { | |
lines(x=c(1, length(amplicon)), y=c(-i, -i), col="blue") | |
sapply(1:length(HCG), function(x) { | |
points(x=HCG[x]-1, y=-i, pch=19, col="black", cex=3) | |
points(x=HCG[x]-1, y=-i, pch=19, col=ifelse(calls.mat[i, x]=="C", "black", ifelse(calls.mat[i,x]=="T", "white", "white")), cex=2.8) | |
}) | |
} | |
#add % conversion | |
text(x=length(amplicon)+12, y=-(1:length(readsSummary)), labels=paste(round(values(readsSummary)$conversion*100, 1), "%", sep=""), cex=0.7) | |
invisible(list("readsHit"=readsHit, "readsSummary"=readsSummary, "readsHitAll"=readsHitAll, "readsSummaryAll"=readsSummaryAll, "reads.order"=reads.order, "calls.mat"=calls.mat)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment