Created
May 31, 2016 07:12
-
-
Save afrendeiro/46a2c573649ab60a06d94850b30b1c07 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import csv | |
import sys | |
# usage: | |
# samtools view -h file.bam | shift_reads.py genome | samtools view -S -b - | samtools sort - file.shifted | |
def getChrSizes(chrmFile): | |
""" | |
Reads tab-delimiter file with two rows describing the chromossomes and its lengths. | |
Returns dictionary of chr:sizes. | |
""" | |
with open(chrmFile, 'r') as f: | |
chrmSizes = {} | |
for line in enumerate(f): | |
row = line[1].strip().split('\t') | |
chrmSizes[str(row[0])] = int(row[1]) | |
return chrmSizes | |
chrSizes = { | |
"hg38": "resources/genomes/hg38/hg38.chromSizes", | |
"hg19": "resources/genomes/hg19/hg19.chromSizes", | |
"mm10": "resources/genomes/mm10/mm10.chromSizes", | |
"dr7": "resources/genomes/dr7/dr7.chromSizes" | |
} | |
genome = sys.argv[1] | |
chrms = getChrSizes(chrSizes[genome]) # get size of chromosomes | |
wr = csv.writer(sys.stdout, delimiter='\t', lineterminator='\n') | |
for row in csv.reader(iter(sys.stdin.readline, ''), delimiter='\t'): | |
if row[0][0] == "@": | |
wr.writerow(row) | |
else: | |
flag = int(row[1]) | |
chrm = row[2] | |
if flag & 16 == 0: | |
if int(row[3]) + 4 + 51 < chrms[chrm]: | |
row[3] = int(row[3]) + 4 | |
else: | |
continue | |
elif flag & 16 == 16: | |
if int(row[3]) - 5 - 51 >= 1: | |
row[3] = int(row[3]) - 5 | |
else: | |
continue | |
wr.writerow(row) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment