Skip to content

Instantly share code, notes, and snippets.

@crazyhottommy
Last active January 2, 2016 20:29
Show Gist options
  • Save crazyhottommy/8357168 to your computer and use it in GitHub Desktop.
Save crazyhottommy/8357168 to your computer and use it in GitHub Desktop.
# This code was modified from the tss plot code. It can plot any other ChIP-seq signal
# at other genomic positions in addtion to tss. In this case, it is the HRE. HIF1a ChIP-seq data
# is available, peaks were called by MACS, generated a bed file. the middle point
# of each peak is used as the center of the plot (you can also use summit of the peak from the exel file
# generated from MACS. HREs at promoters are not included
# 04/10/13
def TSS_Profile(ifile1,ifile2):
'''read in three files, ifile1 is the sortedbamfile prepared by samtool
ifile2 is the promoters (upstream 5kb of TSS) bed file with five columns: chr, start ,end, name and strand'''
import HTSeq
import numpy
import itertools
sortedbamfile=HTSeq.BAM_Reader(ifile1)
promoters = open(ifile2)
halfwinwidth=5000
fragmentsize=200
promoters_pos=set()
for line in promoters:
linelist=line.split()
if linelist[4] == "+":
promoters_pos.add(HTSeq.GenomicPosition(linelist[0],int(linelist[2]),linelist[4]))
else:
promoters_pos.add(HTSeq.GenomicPosition(linelist[0],int(linelist[1]),linelist[4]))
for promoter in itertools.islice(promoters_pos,10):
print promoter # print out 10 promoters
profile=numpy.zeros(2*halfwinwidth, dtype='i')
for p in promoters_pos:
try:
window=HTSeq.GenomicInterval(p.chrom, p.pos-halfwinwidth-fragmentsize,p.pos+halfwinwidth + fragmentsize,".")
for almnt in sortedbamfile[window]:
almnt.iv.length=fragmentsize
if p.strand=="+":
start_in_window=almnt.iv.start- p.pos +halfwinwidth
end_in_window =almnt.iv.end - p.pos +halfwinwidth
else:
start_in_window=p.pos+halfwinwidth-almnt.iv.end
end_in_window =p.pos+halfwinwidth-almnt.iv.start
start_in_window=max(start_in_window,0)
end_in_window=min(end_in_window, 2*halfwinwidth)
if start_in_window >= 2*halfwinwidth or end_in_window <0:
continue
profile[start_in_window : end_in_window] +=1
except:
continue
return profile
ifile1.close()
ifile2.close()
def aligned_counts(ifile1):
'''count how many alignments are aligned back to genome, ifile1 is a sorted bam file'''
import HTSeq
sortedbamfile= HTSeq.BAM_Reader(ifile1)
aligned_counts=0
unaligned_counts=0
for almnt in sortedbamfile:
if almnt.aligned:
aligned_counts+= 1
else:
unaligned_counts+=1
print "number of aligned tags of %s is %d " % (ifile1, aligned_counts)
print "number of unaligned tags of %s is %d "% (ifile1, unaligned_counts)
return aligned_counts
input_bamfile=("/home/tommy/Tet1/SRR764931_shGFP.sorted.bam")
#counts=aligned_counts(input_bamfile)
counts = 49437901
halfwinwidth=5000
profile1=TSS_Profile(input_bamfile,\
"/home/tommy/Tet1/CpG_rich_promoters.bed")
profile1_normalized= (profile1 *1000000.0/counts) /16171.0
profile2=TSS_Profile(input_bamfile,\
"/home/tommy/Tet1/CpG_poor_promoters.bed")
profile2_normalized= (profile2*1000000.0/counts) /16384.0
from matplotlib import pyplot
import numpy
line1=pyplot.plot(numpy.arange(-halfwinwidth, halfwinwidth), profile1_normalized, color="red",label="CpG_rich_promoters")
line2=pyplot.plot(numpy.arange(-halfwinwidth, halfwinwidth), profile2_normalized, color="blue",label="CpG_poor_promoters")
pyplot.legend()
pyplot.xlabel("distance related to TSS bp")
pyplot.ylabel("MeDIP-seq tag density cpm")
pyplot.title("MeDIP-seq intensity around TSS ")
pyplot.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment