Skip to content

Instantly share code, notes, and snippets.

@afrendeiro
Created May 31, 2016 07:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save afrendeiro/46a2c573649ab60a06d94850b30b1c07 to your computer and use it in GitHub Desktop.
Save afrendeiro/46a2c573649ab60a06d94850b30b1c07 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import csv
import sys
# usage:
# samtools view -h file.bam | shift_reads.py genome | samtools view -S -b - | samtools sort - file.shifted
def getChrSizes(chrmFile):
"""
Reads tab-delimiter file with two rows describing the chromossomes and its lengths.
Returns dictionary of chr:sizes.
"""
with open(chrmFile, 'r') as f:
chrmSizes = {}
for line in enumerate(f):
row = line[1].strip().split('\t')
chrmSizes[str(row[0])] = int(row[1])
return chrmSizes
chrSizes = {
"hg38": "resources/genomes/hg38/hg38.chromSizes",
"hg19": "resources/genomes/hg19/hg19.chromSizes",
"mm10": "resources/genomes/mm10/mm10.chromSizes",
"dr7": "resources/genomes/dr7/dr7.chromSizes"
}
genome = sys.argv[1]
chrms = getChrSizes(chrSizes[genome]) # get size of chromosomes
wr = csv.writer(sys.stdout, delimiter='\t', lineterminator='\n')
for row in csv.reader(iter(sys.stdin.readline, ''), delimiter='\t'):
if row[0][0] == "@":
wr.writerow(row)
else:
flag = int(row[1])
chrm = row[2]
if flag & 16 == 0:
if int(row[3]) + 4 + 51 < chrms[chrm]:
row[3] = int(row[3]) + 4
else:
continue
elif flag & 16 == 16:
if int(row[3]) - 5 - 51 >= 1:
row[3] = int(row[3]) - 5
else:
continue
wr.writerow(row)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment