Skip to content

Instantly share code, notes, and snippets.

@astatham
Created June 18, 2012 02:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save astatham/2946426 to your computer and use it in GitHub Desktop.
Save astatham/2946426 to your computer and use it in GitHub Desktop.
NOMe chart generator
GpCplot <- function(amplicon, reads, main="", minScore=150) {
require(Biostrings)
require(GenomicRanges)
if (class(amplicon)=="character") amplicon <- DNAString(amplicon)
amplicon.conv <- DNAString(gsub("C", "T", gsub("GC", "GY", gsub("CG", "YG", amplicon))))
reads <- read.DNAStringSet(reads)
readsHitAll <- list("plus"=pairwiseAlignment(reads, amplicon.conv, type="local-global"), "minus"=pairwiseAlignment(reverseComplement(reads), amplicon.conv, type="local-global"))
readsStrand <- ifelse(score(readsHitAll$plus)>score(readsHitAll$minus), "+", "-")
reads2 <- reads
reads2[readsStrand=="-"] <- reverseComplement(reads2[readsStrand=="-"])
readsHitAll <- pairwiseAlignment(reads2, amplicon.conv, type="local-global")
readsSummaryAll <- GRanges("hit", IRanges(start(pattern(readsHitAll)), end(pattern(readsHitAll))), readsStrand, name=gsub(".*/", "", names(reads)), score=score(readsHitAll))
Cs <- start(matchPattern("C", amplicon))
CpG <- start(matchPattern("CG", amplicon))
GpC <- start(matchPattern("GC", amplicon))+1
HCG <- setdiff(CpG, GpC)
GCH <- setdiff(GpC, CpG)
conversionCs <- setdiff(setdiff(Cs, CpG), GpC)
values(readsSummaryAll)$conversion <- rowMeans(as.matrix(readsHitAll)[,conversionCs]=="T")
readsHit <- readsHitAll[values(readsSummaryAll)$score>=minScore]
readsSummary <- readsSummaryAll[values(readsSummaryAll)$score>=minScore]
#set up the plot
plot(x=NA, type="n", xlim=c(-10, length(amplicon)+10), ylim=c(-length(readsSummary)-5,7), xaxt="n", yaxt="n", xlab="", ylab="Clones", main=main)
lines(x=c(1, length(amplicon)), y=c(3,3), col="red", lwd=3)
#draw GpCs
sapply(start(matchPattern("GCH", amplicon, fixed=FALSE))+1, function(x) {
lines(c(x,x), y=c(3, 4), col="black", lwd=2)
points(x, 4.5, cex=3, col="black", pch=19)
points(x, 4.5, cex=2.8, col="white", pch=19)
})
#draw CpGs
sapply(start(matchPattern("HCG", amplicon, fixed=FALSE))+1, function(x) {
lines(c(x,x), y=c(3, 2), col="black", lwd=2)
points(x, 1.5, cex=3, col="black", pch=19)
points(x, 1.5, cex=2.8, col="lightblue", pch=19)
})
#create matrix of calls, sort by similarity
calls.mat <- as.matrix(readsHit)[,GCH]
reads.order <- hclust(dist(calls.mat=="T"))$order
calls.mat <- calls.mat[reads.order,]
#draw reads w/ calls
for (i in 1:length(reads.order)) {
lines(x=c(1, length(amplicon)), y=c(-i, -i), col="blue")
sapply(1:length(GCH), function(x) {
points(x=GCH[x]-1, y=-i, pch=19, col="#003366", cex=3)
points(x=GCH[x]-1, y=-i, pch=19, col=ifelse(calls.mat[i, x]=="C", "#00CC99", ifelse(calls.mat[i,x]=="T", "lightgrey", "white")), cex=2.8)
})
}
#add % conversion
text(x=length(amplicon)+12, y=-(1:length(readsSummary)), labels=paste(round(values(readsSummary)$conversion*100, 1), "%", sep=""), cex=0.7)
invisible(list("readsHit"=readsHit, "readsSummary"=readsSummary, "readsHitAll"=readsHitAll, "readsSummaryAll"=readsSummaryAll, "reads.order"=reads.order, "calls.mat"=calls.mat))
}
CpGplot <- function(amplicon, reads, main="", minScore=150) {
require(Biostrings)
require(GenomicRanges)
if (class(amplicon)=="character") amplicon <- DNAString(amplicon)
amplicon.conv <- DNAString(gsub("C", "T", gsub("GC", "GY", gsub("CG", "YG", amplicon))))
reads <- read.DNAStringSet(reads)
readsHitAll <- list("plus"=pairwiseAlignment(reads, amplicon.conv, type="local-global"), "minus"=pairwiseAlignment(reverseComplement(reads), amplicon.conv, type="local-global"))
readsStrand <- ifelse(score(readsHitAll$plus)>score(readsHitAll$minus), "+", "-")
reads2 <- reads
reads2[readsStrand=="-"] <- reverseComplement(reads2[readsStrand=="-"])
readsHitAll <- pairwiseAlignment(reads2, amplicon.conv, type="local-global")
readsSummaryAll <- GRanges("hit", IRanges(start(pattern(readsHitAll)), end(pattern(readsHitAll))), readsStrand, name=gsub(".*/", "", names(reads)), score=score(readsHitAll))
Cs <- start(matchPattern("C", amplicon))
CpG <- start(matchPattern("CG", amplicon))
GpC <- start(matchPattern("GC", amplicon))+1
HCG <- setdiff(CpG, GpC)
GCH <- setdiff(GpC, CpG)
conversionCs <- setdiff(setdiff(Cs, CpG), GpC)
values(readsSummaryAll)$conversion <- rowMeans(as.matrix(readsHitAll)[,conversionCs]=="T")
readsHit <- readsHitAll[values(readsSummaryAll)$score>=minScore]
readsSummary <- readsSummaryAll[values(readsSummaryAll)$score>=minScore]
#set up the plot
plot(x=NA, type="n", xlim=c(-10, length(amplicon)+10), ylim=c(-length(readsSummary)-5,7), xaxt="n", yaxt="n", xlab="", ylab="Clones", main=main)
lines(x=c(1, length(amplicon)), y=c(3,3), col="red", lwd=3)
#draw GpCs
sapply(start(matchPattern("GCH", amplicon, fixed=FALSE))+1, function(x) {
lines(c(x,x), y=c(3, 4), col="black", lwd=2)
points(x, 4.5, cex=3, col="black", pch=19)
points(x, 4.5, cex=2.8, col="white", pch=19)
})
#draw CpGs
sapply(start(matchPattern("HCG", amplicon, fixed=FALSE))+1, function(x) {
lines(c(x,x), y=c(3, 2), col="black", lwd=2)
points(x, 1.5, cex=3, col="black", pch=19)
points(x, 1.5, cex=2.8, col="lightblue", pch=19)
})
#create matrix of calls, sort by similarity
calls.mat <- as.matrix(readsHit)[,HCG]
calls.mat2 <- as.matrix(readsHit)[,GCH]
reads.order <- hclust(dist(calls.mat2=="T"))$order
calls.mat <- calls.mat[reads.order,]
#draw reads w/ calls
for (i in 1:length(reads.order)) {
lines(x=c(1, length(amplicon)), y=c(-i, -i), col="blue")
sapply(1:length(HCG), function(x) {
points(x=HCG[x]-1, y=-i, pch=19, col="black", cex=3)
points(x=HCG[x]-1, y=-i, pch=19, col=ifelse(calls.mat[i, x]=="C", "black", ifelse(calls.mat[i,x]=="T", "white", "white")), cex=2.8)
})
}
#add % conversion
text(x=length(amplicon)+12, y=-(1:length(readsSummary)), labels=paste(round(values(readsSummary)$conversion*100, 1), "%", sep=""), cex=0.7)
invisible(list("readsHit"=readsHit, "readsSummary"=readsSummary, "readsHitAll"=readsHitAll, "readsSummaryAll"=readsSummaryAll, "reads.order"=reads.order, "calls.mat"=calls.mat))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment