astatham/NOMe_plot.R

## NOMe_plot.R
GpCplot <- function(amplicon, reads, main="", minScore=150) {
  require(Biostrings)
  require(GenomicRanges)
  if (class(amplicon)=="character") amplicon <- DNAString(amplicon)
  amplicon.conv <- DNAString(gsub("C", "T", gsub("GC", "GY", gsub("CG", "YG", amplicon))))
  reads <- read.DNAStringSet(reads)
  readsHitAll <- list("plus"=pairwiseAlignment(reads, amplicon.conv, type="local-global"), "minus"=pairwiseAlignment(reverseComplement(reads), amplicon.conv, type="local-global"))

  readsStrand <- ifelse(score(readsHitAll$plus)>score(readsHitAll$minus), "+", "-")
  reads2 <- reads
  reads2[readsStrand=="-"] <- reverseComplement(reads2[readsStrand=="-"])
  readsHitAll <- pairwiseAlignment(reads2, amplicon.conv, type="local-global")

  readsSummaryAll <- GRanges("hit", IRanges(start(pattern(readsHitAll)), end(pattern(readsHitAll))), readsStrand, name=gsub(".*/", "", names(reads)), score=score(readsHitAll))

  Cs <- start(matchPattern("C", amplicon))
  CpG <- start(matchPattern("CG", amplicon))
  GpC <- start(matchPattern("GC", amplicon))+1
  HCG <- setdiff(CpG, GpC)
  GCH <- setdiff(GpC, CpG)

  conversionCs <- setdiff(setdiff(Cs, CpG), GpC)
  values(readsSummaryAll)$conversion <- rowMeans(as.matrix(readsHitAll)[,conversionCs]=="T")

  readsHit <- readsHitAll[values(readsSummaryAll)$score>=minScore]
  readsSummary <- readsSummaryAll[values(readsSummaryAll)$score>=minScore]

  #set up the plot
  plot(x=NA, type="n", xlim=c(-10, length(amplicon)+10), ylim=c(-length(readsSummary)-5,7), xaxt="n", yaxt="n", xlab="", ylab="Clones", main=main)
  lines(x=c(1, length(amplicon)), y=c(3,3), col="red", lwd=3)

  #draw GpCs
  sapply(start(matchPattern("GCH", amplicon, fixed=FALSE))+1, function(x) {
    lines(c(x,x), y=c(3, 4), col="black", lwd=2)
    points(x, 4.5, cex=3, col="black", pch=19)
    points(x, 4.5, cex=2.8, col="white", pch=19)
  })

  #draw CpGs
  sapply(start(matchPattern("HCG", amplicon, fixed=FALSE))+1, function(x) {
    lines(c(x,x), y=c(3, 2), col="black", lwd=2)
    points(x, 1.5, cex=3, col="black", pch=19)
    points(x, 1.5, cex=2.8, col="lightblue", pch=19)
  })

  #create matrix of calls, sort by similarity
  calls.mat <- as.matrix(readsHit)[,GCH]
  reads.order <- hclust(dist(calls.mat=="T"))$order
  calls.mat <- calls.mat[reads.order,]
  #draw reads w/ calls
  for (i in 1:length(reads.order)) {
    lines(x=c(1, length(amplicon)), y=c(-i, -i), col="blue")
    sapply(1:length(GCH), function(x) {
      points(x=GCH[x]-1, y=-i, pch=19, col="#003366", cex=3)
      points(x=GCH[x]-1, y=-i, pch=19, col=ifelse(calls.mat[i, x]=="C", "#00CC99",  ifelse(calls.mat[i,x]=="T", "lightgrey", "white")), cex=2.8)
    })
  }
  #add % conversion
  text(x=length(amplicon)+12, y=-(1:length(readsSummary)), labels=paste(round(values(readsSummary)$conversion*100, 1), "%", sep=""), cex=0.7)


  invisible(list("readsHit"=readsHit, "readsSummary"=readsSummary, "readsHitAll"=readsHitAll, "readsSummaryAll"=readsSummaryAll, "reads.order"=reads.order, "calls.mat"=calls.mat))
}

CpGplot <- function(amplicon, reads, main="", minScore=150) {
  require(Biostrings)
  require(GenomicRanges)
  if (class(amplicon)=="character") amplicon <- DNAString(amplicon)
  amplicon.conv <- DNAString(gsub("C", "T", gsub("GC", "GY", gsub("CG", "YG", amplicon))))
  reads <- read.DNAStringSet(reads)
  readsHitAll <- list("plus"=pairwiseAlignment(reads, amplicon.conv, type="local-global"), "minus"=pairwiseAlignment(reverseComplement(reads), amplicon.conv, type="local-global"))

  readsStrand <- ifelse(score(readsHitAll$plus)>score(readsHitAll$minus), "+", "-")
  reads2 <- reads
  reads2[readsStrand=="-"] <- reverseComplement(reads2[readsStrand=="-"])
  readsHitAll <- pairwiseAlignment(reads2, amplicon.conv, type="local-global")

  readsSummaryAll <- GRanges("hit", IRanges(start(pattern(readsHitAll)), end(pattern(readsHitAll))), readsStrand, name=gsub(".*/", "", names(reads)), score=score(readsHitAll))

  Cs <- start(matchPattern("C", amplicon))
  CpG <- start(matchPattern("CG", amplicon))
  GpC <- start(matchPattern("GC", amplicon))+1
  HCG <- setdiff(CpG, GpC)
  GCH <- setdiff(GpC, CpG)

  conversionCs <- setdiff(setdiff(Cs, CpG), GpC)
  values(readsSummaryAll)$conversion <- rowMeans(as.matrix(readsHitAll)[,conversionCs]=="T")

  readsHit <- readsHitAll[values(readsSummaryAll)$score>=minScore]
  readsSummary <- readsSummaryAll[values(readsSummaryAll)$score>=minScore]

  #set up the plot
  plot(x=NA, type="n", xlim=c(-10, length(amplicon)+10), ylim=c(-length(readsSummary)-5,7), xaxt="n", yaxt="n", xlab="", ylab="Clones", main=main)
  lines(x=c(1, length(amplicon)), y=c(3,3), col="red", lwd=3)

  #draw GpCs
  sapply(start(matchPattern("GCH", amplicon, fixed=FALSE))+1, function(x) {
    lines(c(x,x), y=c(3, 4), col="black", lwd=2)
    points(x, 4.5, cex=3, col="black", pch=19)
    points(x, 4.5, cex=2.8, col="white", pch=19)
  })

  #draw CpGs
  sapply(start(matchPattern("HCG", amplicon, fixed=FALSE))+1, function(x) {
    lines(c(x,x), y=c(3, 2), col="black", lwd=2)
    points(x, 1.5, cex=3, col="black", pch=19)
    points(x, 1.5, cex=2.8, col="lightblue", pch=19)
  })

  #create matrix of calls, sort by similarity
  calls.mat <- as.matrix(readsHit)[,HCG]
  calls.mat2 <- as.matrix(readsHit)[,GCH]
  reads.order <- hclust(dist(calls.mat2=="T"))$order
  calls.mat <- calls.mat[reads.order,]
  #draw reads w/ calls
  for (i in 1:length(reads.order)) {
    lines(x=c(1, length(amplicon)), y=c(-i, -i), col="blue")
    sapply(1:length(HCG), function(x) {
      points(x=HCG[x]-1, y=-i, pch=19, col="black", cex=3)
      points(x=HCG[x]-1, y=-i, pch=19, col=ifelse(calls.mat[i, x]=="C", "black",  ifelse(calls.mat[i,x]=="T", "white", "white")), cex=2.8)
    })
  }
  #add % conversion
  text(x=length(amplicon)+12, y=-(1:length(readsSummary)), labels=paste(round(values(readsSummary)$conversion*100, 1), "%", sep=""), cex=0.7)


  invisible(list("readsHit"=readsHit, "readsSummary"=readsSummary, "readsHitAll"=readsHitAll, "readsSummaryAll"=readsSummaryAll, "reads.order"=reads.order, "calls.mat"=calls.mat))
}
	GpCplot <- function(amplicon, reads, main="", minScore=150) {
	require(Biostrings)
	require(GenomicRanges)
	if (class(amplicon)=="character") amplicon <- DNAString(amplicon)
	amplicon.conv <- DNAString(gsub("C", "T", gsub("GC", "GY", gsub("CG", "YG", amplicon))))
	reads <- read.DNAStringSet(reads)
	readsHitAll <- list("plus"=pairwiseAlignment(reads, amplicon.conv, type="local-global"), "minus"=pairwiseAlignment(reverseComplement(reads), amplicon.conv, type="local-global"))

	readsStrand <- ifelse(score(readsHitAll$plus)>score(readsHitAll$minus), "+", "-")
	reads2 <- reads
	reads2[readsStrand=="-"] <- reverseComplement(reads2[readsStrand=="-"])
	readsHitAll <- pairwiseAlignment(reads2, amplicon.conv, type="local-global")

	readsSummaryAll <- GRanges("hit", IRanges(start(pattern(readsHitAll)), end(pattern(readsHitAll))), readsStrand, name=gsub(".*/", "", names(reads)), score=score(readsHitAll))

	Cs <- start(matchPattern("C", amplicon))
	CpG <- start(matchPattern("CG", amplicon))
	GpC <- start(matchPattern("GC", amplicon))+1
	HCG <- setdiff(CpG, GpC)
	GCH <- setdiff(GpC, CpG)

	conversionCs <- setdiff(setdiff(Cs, CpG), GpC)
	values(readsSummaryAll)$conversion <- rowMeans(as.matrix(readsHitAll)[,conversionCs]=="T")

	readsHit <- readsHitAll[values(readsSummaryAll)$score>=minScore]
	readsSummary <- readsSummaryAll[values(readsSummaryAll)$score>=minScore]

	#set up the plot
	plot(x=NA, type="n", xlim=c(-10, length(amplicon)+10), ylim=c(-length(readsSummary)-5,7), xaxt="n", yaxt="n", xlab="", ylab="Clones", main=main)
	lines(x=c(1, length(amplicon)), y=c(3,3), col="red", lwd=3)

	#draw GpCs
	sapply(start(matchPattern("GCH", amplicon, fixed=FALSE))+1, function(x) {
	lines(c(x,x), y=c(3, 4), col="black", lwd=2)
	points(x, 4.5, cex=3, col="black", pch=19)
	points(x, 4.5, cex=2.8, col="white", pch=19)
	})

	#draw CpGs
	sapply(start(matchPattern("HCG", amplicon, fixed=FALSE))+1, function(x) {
	lines(c(x,x), y=c(3, 2), col="black", lwd=2)
	points(x, 1.5, cex=3, col="black", pch=19)
	points(x, 1.5, cex=2.8, col="lightblue", pch=19)
	})

	#create matrix of calls, sort by similarity
	calls.mat <- as.matrix(readsHit)[,GCH]
	reads.order <- hclust(dist(calls.mat=="T"))$order
	calls.mat <- calls.mat[reads.order,]
	#draw reads w/ calls
	for (i in 1:length(reads.order)) {
	lines(x=c(1, length(amplicon)), y=c(-i, -i), col="blue")
	sapply(1:length(GCH), function(x) {
	points(x=GCH[x]-1, y=-i, pch=19, col="#003366", cex=3)
	points(x=GCH[x]-1, y=-i, pch=19, col=ifelse(calls.mat[i, x]=="C", "#00CC99", ifelse(calls.mat[i,x]=="T", "lightgrey", "white")), cex=2.8)
	})
	}
	#add % conversion
	text(x=length(amplicon)+12, y=-(1:length(readsSummary)), labels=paste(round(values(readsSummary)$conversion*100, 1), "%", sep=""), cex=0.7)


	invisible(list("readsHit"=readsHit, "readsSummary"=readsSummary, "readsHitAll"=readsHitAll, "readsSummaryAll"=readsSummaryAll, "reads.order"=reads.order, "calls.mat"=calls.mat))
	}

	CpGplot <- function(amplicon, reads, main="", minScore=150) {
	require(Biostrings)
	require(GenomicRanges)
	if (class(amplicon)=="character") amplicon <- DNAString(amplicon)
	amplicon.conv <- DNAString(gsub("C", "T", gsub("GC", "GY", gsub("CG", "YG", amplicon))))
	reads <- read.DNAStringSet(reads)
	readsHitAll <- list("plus"=pairwiseAlignment(reads, amplicon.conv, type="local-global"), "minus"=pairwiseAlignment(reverseComplement(reads), amplicon.conv, type="local-global"))

	readsStrand <- ifelse(score(readsHitAll$plus)>score(readsHitAll$minus), "+", "-")
	reads2 <- reads
	reads2[readsStrand=="-"] <- reverseComplement(reads2[readsStrand=="-"])
	readsHitAll <- pairwiseAlignment(reads2, amplicon.conv, type="local-global")

	readsSummaryAll <- GRanges("hit", IRanges(start(pattern(readsHitAll)), end(pattern(readsHitAll))), readsStrand, name=gsub(".*/", "", names(reads)), score=score(readsHitAll))

	Cs <- start(matchPattern("C", amplicon))
	CpG <- start(matchPattern("CG", amplicon))
	GpC <- start(matchPattern("GC", amplicon))+1
	HCG <- setdiff(CpG, GpC)
	GCH <- setdiff(GpC, CpG)

	conversionCs <- setdiff(setdiff(Cs, CpG), GpC)
	values(readsSummaryAll)$conversion <- rowMeans(as.matrix(readsHitAll)[,conversionCs]=="T")

	readsHit <- readsHitAll[values(readsSummaryAll)$score>=minScore]
	readsSummary <- readsSummaryAll[values(readsSummaryAll)$score>=minScore]

	#set up the plot
	plot(x=NA, type="n", xlim=c(-10, length(amplicon)+10), ylim=c(-length(readsSummary)-5,7), xaxt="n", yaxt="n", xlab="", ylab="Clones", main=main)
	lines(x=c(1, length(amplicon)), y=c(3,3), col="red", lwd=3)

	#draw GpCs
	sapply(start(matchPattern("GCH", amplicon, fixed=FALSE))+1, function(x) {
	lines(c(x,x), y=c(3, 4), col="black", lwd=2)
	points(x, 4.5, cex=3, col="black", pch=19)
	points(x, 4.5, cex=2.8, col="white", pch=19)
	})

	#draw CpGs
	sapply(start(matchPattern("HCG", amplicon, fixed=FALSE))+1, function(x) {
	lines(c(x,x), y=c(3, 2), col="black", lwd=2)
	points(x, 1.5, cex=3, col="black", pch=19)
	points(x, 1.5, cex=2.8, col="lightblue", pch=19)
	})

	#create matrix of calls, sort by similarity
	calls.mat <- as.matrix(readsHit)[,HCG]
	calls.mat2 <- as.matrix(readsHit)[,GCH]
	reads.order <- hclust(dist(calls.mat2=="T"))$order
	calls.mat <- calls.mat[reads.order,]
	#draw reads w/ calls
	for (i in 1:length(reads.order)) {
	lines(x=c(1, length(amplicon)), y=c(-i, -i), col="blue")
	sapply(1:length(HCG), function(x) {
	points(x=HCG[x]-1, y=-i, pch=19, col="black", cex=3)
	points(x=HCG[x]-1, y=-i, pch=19, col=ifelse(calls.mat[i, x]=="C", "black", ifelse(calls.mat[i,x]=="T", "white", "white")), cex=2.8)
	})
	}
	#add % conversion
	text(x=length(amplicon)+12, y=-(1:length(readsSummary)), labels=paste(round(values(readsSummary)$conversion*100, 1), "%", sep=""), cex=0.7)


	invisible(list("readsHit"=readsHit, "readsSummary"=readsSummary, "readsHitAll"=readsHitAll, "readsSummaryAll"=readsSummaryAll, "reads.order"=reads.order, "calls.mat"=calls.mat))
	}