Skip to content

Instantly share code, notes, and snippets.

@suqingdong
Last active October 14, 2016 14:36
Show Gist options
  • Save suqingdong/e1f9e56ee6e525015a79f0662027420b to your computer and use it in GitHub Desktop.
Save suqingdong/e1f9e56ee6e525015a79f0662027420b to your computer and use it in GitHub Desktop.
replace each sample's genetype
#!/usr/bin/env python
# Extract columns: 'ChROM POS ID REF ALT GeneName' + samples' columns
def safe_open(infile):
try:
if infile.endswith('.gz'):
import gzip
return gzip.open(infile)
else:
return open(infile)
except IOError:
print "Error: File not exist!"
# Replace as follow:
# '.', './.', '0/0*' to '0'
# '0/1*' to '1'
# '1/1*', '1/2*' to '2'
def replaceSNP(infile, outfile):
with safe_open(infile) as f:
with open(outfile, 'w') as out:
headerline = f.readline().strip().split('\t')
sample_start = headerline.index('FORMAT') + 1 # start after FORMAT
sample_stop = headerline.index('Ori_REF') # until Ori_REF
header_indexs = []
for i in ['CHROM','POS','ID','REF','ALT','GeneName']:
header_indexs.append(headerline.index(i))
newheader = [headerline[i] for i in header_indexs ] + headerline[sample_start:sample_stop]
newheader = '\t'.join(newheader) + '\n'
out.write(newheader)
for line in f:
linelist = line.strip().split('\t')
info_columns = [linelist[i] for i in header_indexs]
sample_info = linelist[sample_start:sample_stop]
sample_info_replaced = ['0' if sample.startswith('.') or sample.startswith('0/0') else sample for sample in sample_info]
sample_info_replaced = ['1' if sample.startswith('0/1') else sample for sample in sample_info_replaced]
sample_info_replaced = ['2' if sample.startswith('1/') else sample for sample in sample_info_replaced]
newline = info_columns + sample_info_replaced
newline = '\t'.join(newline) + '\n'
out.write(newline)
if __name__ == '__main__':
import sys
if len(sys.argv) < 3:
print "Usage: python %s <infile> <outfile>" % sys.argv[0]
exit(1)
replaceSNP(sys.argv[1], sys.argv[2])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment