Last active
October 14, 2016 14:36
-
-
Save suqingdong/e1f9e56ee6e525015a79f0662027420b to your computer and use it in GitHub Desktop.
replace each sample's genetype
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Extract columns: 'ChROM POS ID REF ALT GeneName' + samples' columns | |
def safe_open(infile): | |
try: | |
if infile.endswith('.gz'): | |
import gzip | |
return gzip.open(infile) | |
else: | |
return open(infile) | |
except IOError: | |
print "Error: File not exist!" | |
# Replace as follow: | |
# '.', './.', '0/0*' to '0' | |
# '0/1*' to '1' | |
# '1/1*', '1/2*' to '2' | |
def replaceSNP(infile, outfile): | |
with safe_open(infile) as f: | |
with open(outfile, 'w') as out: | |
headerline = f.readline().strip().split('\t') | |
sample_start = headerline.index('FORMAT') + 1 # start after FORMAT | |
sample_stop = headerline.index('Ori_REF') # until Ori_REF | |
header_indexs = [] | |
for i in ['CHROM','POS','ID','REF','ALT','GeneName']: | |
header_indexs.append(headerline.index(i)) | |
newheader = [headerline[i] for i in header_indexs ] + headerline[sample_start:sample_stop] | |
newheader = '\t'.join(newheader) + '\n' | |
out.write(newheader) | |
for line in f: | |
linelist = line.strip().split('\t') | |
info_columns = [linelist[i] for i in header_indexs] | |
sample_info = linelist[sample_start:sample_stop] | |
sample_info_replaced = ['0' if sample.startswith('.') or sample.startswith('0/0') else sample for sample in sample_info] | |
sample_info_replaced = ['1' if sample.startswith('0/1') else sample for sample in sample_info_replaced] | |
sample_info_replaced = ['2' if sample.startswith('1/') else sample for sample in sample_info_replaced] | |
newline = info_columns + sample_info_replaced | |
newline = '\t'.join(newline) + '\n' | |
out.write(newline) | |
if __name__ == '__main__': | |
import sys | |
if len(sys.argv) < 3: | |
print "Usage: python %s <infile> <outfile>" % sys.argv[0] | |
exit(1) | |
replaceSNP(sys.argv[1], sys.argv[2]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment