Created
May 13, 2013 08:34
-
-
Save pfurio/5566946 to your computer and use it in GitHub Desktop.
Get the counts for DNAse-seq experiments
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# Author: Pedro Furió Tarí | |
# Data: 06/05/2013 | |
# | |
# This script will take a sorted and indexed bam file and return different text files with the counts | |
# Useful when doing a trimming step for pair-end. After trimming, we have to make sure that we have the pairs correctly paired | |
import getopt, sys, os.path | |
import pysam | |
def main(): | |
try: | |
opts, args = getopt.getopt(sys.argv[1:], "hi:w:s:o:", ["help", "input=", "window=", "step=", "output="]) | |
except getopt.GetoptError as err: | |
print(err) # will print something like "option -a not recognized" | |
usage() | |
sys.exit(2) | |
input = None | |
window = None | |
step = None | |
output_dir = None | |
for o, a in opts: | |
if o in ("-h","--help"): | |
usage() | |
sys.exit() | |
elif o in ("-i", "--input"): | |
if os.path.isfile(a): | |
input = a | |
else: | |
print a + " file does not exist" | |
elif o in ("-w", "--window"): | |
window = int(a) | |
elif o in ("-s", "--step"): | |
step = int(a) | |
elif o in ("-o", "--output"): | |
if os.path.isdir(a): | |
output_dir = a | |
else: | |
print a + " directory does not exist" | |
else: | |
assert False, "Unhandled option" | |
if output_dir != None and output_dir[-1] != "/": | |
output_dir = output_dir + "/" | |
if output_dir != None and window != None and step != None and input != None: | |
run(input, output_dir, window, step) | |
else: | |
usage() | |
def usage(): | |
print "\nUsage: python readPeaks.py [options] <mandatory>" | |
print "Options:" | |
print "\t-h, --help:\n\t\t show this help message and exit" | |
print "Mandatory:" | |
print "\t-i, --input:\n\t\t Sorted and indexed bam file" | |
print "\t-w, --window:\n\t\t Length of the sliding window" | |
print "\t-s, --step:\n\t\t Step of the sliding window" | |
print "\t-o, --output:\n\t\t Output directory to put the files" | |
print "\n13/05/2013. Pedro Furió Tarí.\n" | |
def getCounts (samfile, chrom, tam_chrom, output_dir, window, step): | |
file_out = open(output_dir + chrom + ".txt", "w") | |
for pos in range(0,tam_chrom,step): | |
file_out.write(str(pos) + "\t" + str(samfile.count(chrom,pos,pos+window)) + "\n") | |
# file_out.write(str(pos) + "-" + str(pos+window) + "\t" + str(samfile.count(chrom,pos,pos+window)) + "\n") | |
file_out.close() | |
def run(input_bam, output_dir, window, step): | |
samfile = pysam.Samfile( "/clinicfs/projects/mineco/3.ENCODE/Caltech/5.mappings/Gm12878/Gm12878_Rp1_pair/accepted_hits.bam", "rb" ) | |
for lista_chroms in samfile.header['SQ']: | |
chrom = lista_chroms['SN'] | |
tam_chrom = lista_chroms['LN'] | |
print "Retrieving counts from chromosome " + chrom + "..." | |
getCounts(samfile, chrom, tam_chrom, output_dir, window, step) | |
samfile.close() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment