Skip to content

Instantly share code, notes, and snippets.

@taoliu
Created November 6, 2013 05:39
Show Gist options
  • Save taoliu/7331466 to your computer and use it in GitHub Desktop.
Save taoliu/7331466 to your computer and use it in GitHub Desktop.
Script to bin chromosome
#!/usr/bin/env python
# Time-stamp: <2013-09-24 15:23:09 Tao Liu>
import os
import sys
# ------------------------------------
# Main function
# ------------------------------------
def main():
if len( sys.argv ) < 4:
sys.stderr.write( "Divid chromosomes into fixed sized bins. Bins could be overlapping.\n\n" )
sys.stderr.write( "need 3 paras: %s <binsize> <stepsize> <chromlen file>\n" % sys.argv[0] )
sys.stderr.write( " binsize: fixed size of each bin\n" )
sys.stderr.write( " stepsize: fixed size of each bin (set as binsize if overlapping is not desired.)\n" )
sys.stderr.write( " chromlen file: chromsome name, chromosome length file from UCSC genome browser site\n\n" )
sys.exit( 1 )
binsize = int( sys.argv[1] )
stepsize= int( sys.argv[2] )
clfhd = file( sys.argv[3], "r" )
bin_index = 1
for l in clfhd:
l = l.strip()
fs = l.split()
chrname = fs[0]
chrlen = int( fs[ 1 ] )
startpos = 0
for endpos in xrange( binsize, chrlen + stepsize, stepsize ):
if min( chrlen, endpos ) > startpos:
print "%s\t%d\t%d\tbin_%d\t%d" % ( chrname, startpos, min( chrlen, endpos ), bin_index, min( chrlen, endpos ) - startpos )
startpos += stepsize
bin_index += 1
#done
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment