Skip to content

Instantly share code, notes, and snippets.

@suqingdong
Created October 17, 2016 16:04
Show Gist options
  • Save suqingdong/4834553fe6f741d88aff16fe22f4a81c to your computer and use it in GitHub Desktop.
Save suqingdong/4834553fe6f741d88aff16fe22f4a81c to your computer and use it in GitHub Desktop.
generate sample_info according info.txt, pn and disease
#!/usr/bin/env python
#-*- coding: utf-8 -*-
def get_sample_info(infofile, pn, disease):
header = '#B1\n#FamilyID\tSampleID\tSex\tNormal/Patient\tPN'
if disease:
header += '\tDisease'
header += '\n'
with open('sample_info', 'w') as out:
out.write(header)
with open(infofile) as f:
for eachline in f:
if len(eachline) > 1:
familyid = eachline.rstrip().split('\t')[0] or '.'
sampleid = eachline.rstrip().split('\t')[2]
sex = eachline.rstrip().split('\t')[4]
nop = eachline.rstrip().split('\t')[6]
line = '{}\t{}\t{}\t{}\t{}'.format(familyid, sampleid, sex, nop, pn)
if disease:
line = line + '\t' + disease
line += '\n'
out.write(line)
if __name__ == '__main__':
import sys
if len(sys.argv) < 3:
print "Usage: {} <info.txt> <pn> [disease]".format(sys.argv[0])
exit(1)
if len(sys.argv) == 4:
disease = sys.argv[3]
else:
disease = None
get_sample_info(sys.argv[1], sys.argv[2], disease)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment