Skip to content

Instantly share code, notes, and snippets.

@suqingdong
Created October 17, 2016 15:13
Show Gist options
  • Save suqingdong/4c2c09c2065f9e8e7bd48cf1f154f43c to your computer and use it in GitHub Desktop.
Save suqingdong/4c2c09c2065f9e8e7bd48cf1f154f43c to your computer and use it in GitHub Desktop.
merge the replaced files, if no pos, mark '0'
#!/usr/bin/env python
#======================================
# merge file1 and file2 of replaced.xls
# if no pos, mark '0'
# output coverted.xls
#======================================
# sampledict structure: {'sample1': {'pos1':'snp1','pos2':'snp2',... }, ...}
def getSampleDict(infile):
sampledict = {}
with open(infile) as f:
header = f.readline().strip().split('\t')
positions = header[1:]
for line in f:
name = line.split('\t')[0]
snps = line.strip().split('\t')[1:]
for position,snp in zip(positions, snps):
if name not in sampledict:
sampledict[name] = {position: snp}
else:
sampledict[name].update({position: snp})
return sampledict, positions
def writeSampleDict(sampledict, mergedpositions, group, out):
for sample,posdict in sampledict.items():
line = str(group)+'\t'+sample
for pos in mergedpositions:
snp = posdict.get(pos, '0') #if no pos, mark '0'
line += '\t'+snp
line += '\n'
#print line
out.write(line)
def mergeSNP(file1, file2, outfile):
sampledict1,positions1 = getSampleDict(file1)
sampledict2,positions2 = getSampleDict(file2)
mergedpositions = sorted(set(positions1).union(positions2))
#print mergedpositions
with open(outfile, 'w') as out:
out.write('Group\tSample\t'+'\t'.join(mergedpositions)+'\n')
writeSampleDict(sampledict1, mergedpositions, 1, out)
writeSampleDict(sampledict2, mergedpositions, 2, out)
if __name__ == '__main__':
import sys
if len(sys.argv) < 4:
print "Usage: python %s <file1> <file2> <outfile>" % sys.argv[0]
exit(1)
mergeSNP(sys.argv[1], sys.argv[2], sys.argv[3])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment