Skip to content

Instantly share code, notes, and snippets.

@informationsea
Created June 28, 2015 11:21
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save informationsea/439d4fc53ea2b17cfb05 to your computer and use it in GitHub Desktop.
Save informationsea/439d4fc53ea2b17cfb05 to your computer and use it in GitHub Desktop.
Convert USCS refFlat.txt to BED format
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import argparse
import csv
def _main():
parser = argparse.ArgumentParser(description="Convert RefFlat to BED format")
parser.add_argument('refFlat', type=argparse.FileType('r'))
parser.add_argument('outputBed', type=argparse.FileType('w'))
parser.add_argument('-n', '--name', help='Name of Bed File', default='refflat')
options = parser.parse_args()
reader = csv.reader(options.refFlat, delimiter='\t', quotechar=None)
writer = csv.writer(options.outputBed, delimiter='\t', quotechar=None)
writer.writerow(['track name="{}"'.format(options.name)])
for row in reader:
writer.writerow((row[2], row[4], row[5], row[1]+'|'+row[0], 0, row[3], row[6], row[7], 0,
row[8],
','.join([str(int(x) - int(y)) for x, y in zip(row[10].split(','), row[9].split(',')) if x and y]),
','.join([str(int(x) - int(row[4])) for x in row[9].split(',') if x])))
if __name__ == '__main__':
_main()
@afrendeiro
Copy link

Very nice, thank you!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment