Skip to content

Instantly share code, notes, and snippets.

@pfurio
Created May 13, 2013 08:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pfurio/5566946 to your computer and use it in GitHub Desktop.
Save pfurio/5566946 to your computer and use it in GitHub Desktop.
Get the counts for DNAse-seq experiments
# -*- coding: utf-8 -*-
# Author: Pedro Furió Tarí
# Data: 06/05/2013
#
# This script will take a sorted and indexed bam file and return different text files with the counts
# Useful when doing a trimming step for pair-end. After trimming, we have to make sure that we have the pairs correctly paired
import getopt, sys, os.path
import pysam
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], "hi:w:s:o:", ["help", "input=", "window=", "step=", "output="])
except getopt.GetoptError as err:
print(err) # will print something like "option -a not recognized"
usage()
sys.exit(2)
input = None
window = None
step = None
output_dir = None
for o, a in opts:
if o in ("-h","--help"):
usage()
sys.exit()
elif o in ("-i", "--input"):
if os.path.isfile(a):
input = a
else:
print a + " file does not exist"
elif o in ("-w", "--window"):
window = int(a)
elif o in ("-s", "--step"):
step = int(a)
elif o in ("-o", "--output"):
if os.path.isdir(a):
output_dir = a
else:
print a + " directory does not exist"
else:
assert False, "Unhandled option"
if output_dir != None and output_dir[-1] != "/":
output_dir = output_dir + "/"
if output_dir != None and window != None and step != None and input != None:
run(input, output_dir, window, step)
else:
usage()
def usage():
print "\nUsage: python readPeaks.py [options] <mandatory>"
print "Options:"
print "\t-h, --help:\n\t\t show this help message and exit"
print "Mandatory:"
print "\t-i, --input:\n\t\t Sorted and indexed bam file"
print "\t-w, --window:\n\t\t Length of the sliding window"
print "\t-s, --step:\n\t\t Step of the sliding window"
print "\t-o, --output:\n\t\t Output directory to put the files"
print "\n13/05/2013. Pedro Furió Tarí.\n"
def getCounts (samfile, chrom, tam_chrom, output_dir, window, step):
file_out = open(output_dir + chrom + ".txt", "w")
for pos in range(0,tam_chrom,step):
file_out.write(str(pos) + "\t" + str(samfile.count(chrom,pos,pos+window)) + "\n")
# file_out.write(str(pos) + "-" + str(pos+window) + "\t" + str(samfile.count(chrom,pos,pos+window)) + "\n")
file_out.close()
def run(input_bam, output_dir, window, step):
samfile = pysam.Samfile( "/clinicfs/projects/mineco/3.ENCODE/Caltech/5.mappings/Gm12878/Gm12878_Rp1_pair/accepted_hits.bam", "rb" )
for lista_chroms in samfile.header['SQ']:
chrom = lista_chroms['SN']
tam_chrom = lista_chroms['LN']
print "Retrieving counts from chromosome " + chrom + "..."
getCounts(samfile, chrom, tam_chrom, output_dir, window, step)
samfile.close()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment