Last active
July 3, 2017 11:24
metaseq demo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# From http://www.biostars.org/p/83800/: | |
# "What I want to do is to plot reads of my histone marks (in bam file) | |
# around TSS with CpG and TSS without CpG (Essentially a coverage profile)." | |
# | |
# | |
# To install metaseq and dependencies, see: | |
# | |
# | |
# https://pythonhosted.org/metaseq/install.html | |
# | |
# | |
# To download the example data used here, make sure you're in the directory | |
# this script is saved in, and then use: | |
# | |
# git clone https://gist.github.com/a2e63a2fb93d05341de5.git demo_data | |
# | |
# (Or see https://gist.github.com/daler/a2e63a2fb93d05341de5 and download the | |
# files individually) | |
# | |
import metaseq | |
import pybedtools | |
import numpy as np | |
from matplotlib import pyplot as plt | |
bam = metaseq.genomic_signal('demo_data/h3k4me3-chr21.bam', 'bam') | |
cpg = pybedtools.BedTool('demo_data/cpg-chr21.bed.gz') | |
tss = pybedtools.BedTool('demo_data/tss-chr21.bed.gz') | |
# extend by 5 kb up/downstream | |
tss = tss.slop(b=5000, genome='hg19') | |
tss_with_cpg = tss.intersect(cpg, u=True) | |
tss_without_cpg = tss.intersect(cpg, v=True) | |
# change this to as many CPUs as you have in order to run in parallel | |
processes = 1 | |
# each read will be extended 3' to a total size of this many bp | |
fragment_size = 200 | |
# the region +/-5kb around each TSS will be split into a total of 100 bins, | |
# change as needed | |
bins = 100 | |
x = np.linspace(-5000, 5000, bins) | |
# most of the work happens here | |
y1 = bam.array(tss_with_cpg, bins=bins, processes=processes, fragment_size=fragment_size) | |
y2 = bam.array(tss_without_cpg, bins=bins, processes=processes, fragment_size=fragment_size) | |
plt.rcParams['font.size'] = 11 | |
plt.rcParams['font.family'] = 'Arial' | |
plt.plot(x, y1.mean(axis=0), label='with cpg', color='k') | |
plt.plot(x, y2.mean(axis=0), label='without cpg', color='r', linestyle='--') | |
plt.legend(loc='best') | |
plt.xlabel('Distance from TSS (bp)') | |
plt.ylabel('Mean H3K4me3 read density') | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@daler @ryan Dale , Could you provide me tss-hg19.bed.gz file from Gencode19 or how can i find it? Any link is great appreciated. I've googled it but what i want is from Gencode hg19. Besides, why did you divide tss file into TSS with CpG and TSS without CpG? I just want to all TSS file.