Skip to content

Instantly share code, notes, and snippets.

@mt1022
Last active August 26, 2019 02:31
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mt1022/897fc5a721a13628229a1a8827bb07ab to your computer and use it in GitHub Desktop.
Save mt1022/897fc5a721a13628229a1a8827bb07ab to your computer and use it in GitHub Desktop.
Extending the `bedtools slop` for bed12 format.
"""
adjust the size of interval stored in bed12 format;
=================================
author: mt1022 (zh)
date: 2017-01-03 19:45
"""
import sys
import argparse
def slop(bed_stream, left=0, right=0, out_stream=sys.stdout):
"""equivalent to bedtools slop for bed12"""
for line in bed_stream:
ary = line.strip().split('\t')
blocks = ary[9]
blocks_size = [int(i) for i in ary[10].split(',')]
blocks_start = [int(i) for i in ary[11].split(',')]
if sum(blocks_size) <= right + left:
continue # discard too short reads
region_start = int(ary[6])
# make chrosomal intervals
intervals = []
for i, j in zip(blocks_size, blocks_start):
intervals.append((region_start + j, region_start + j + i - 1))
# pruning left end
l = right
while l > 0:
i, j = intervals[0]
if j - i + 1 > l:
intervals[0] = (i + l, j)
l = 0
else:
intervals = intervals[1:]
l -= (j - i + 1)
# pruning right end
r = left
while r > 0:
i, j = intervals[-1]
if j - i + 1 > r:
intervals[-1] = (i, j - r)
r = 0
else:
intervals = intervals[:-1]
r -= (j - i + 1)
# update intervals
new_blocks_size = [j - i + 1 for i, j in intervals]
new_region_start = intervals[0][0]
new_region_end = intervals[-1][1] + 1
new_blocks_start = [i - new_region_start for i, j in intervals]
# print output
out = [ary[0], new_region_start, new_region_end] + ary[3:6] + [
new_region_start, new_region_end, '255,0,0'] + [
len(intervals), ','.join(str(i) for i in new_blocks_size),
','.join(str(i) for i in new_blocks_start)
]
out = [str(i) for i in out]
print('\t'.join(out), file=out_stream)
return
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='bed12tools slop')
parser.add_argument('-i', '--input', type=argparse.FileType('r'), default=sys.stdin)
parser.add_argument('-l', '--left', type=int, default=0,
help='The number of base pairs to subtract from the start coordinate.')
parser.add_argument('-r', '--right', type=int, default=0,
help='The number of base pairs to subtract from the end coordinate.')
parser.add_argument('-o', '--output', nargs='?', type=argparse.FileType('w'), default=sys.stdout)
args = parser.parse_args()
slop(args.input, args.left, args.right, args.output)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment