Created
October 17, 2016 15:13
-
-
Save suqingdong/4c2c09c2065f9e8e7bd48cf1f154f43c to your computer and use it in GitHub Desktop.
merge the replaced files, if no pos, mark '0'
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#====================================== | |
# merge file1 and file2 of replaced.xls | |
# if no pos, mark '0' | |
# output coverted.xls | |
#====================================== | |
# sampledict structure: {'sample1': {'pos1':'snp1','pos2':'snp2',... }, ...} | |
def getSampleDict(infile): | |
sampledict = {} | |
with open(infile) as f: | |
header = f.readline().strip().split('\t') | |
positions = header[1:] | |
for line in f: | |
name = line.split('\t')[0] | |
snps = line.strip().split('\t')[1:] | |
for position,snp in zip(positions, snps): | |
if name not in sampledict: | |
sampledict[name] = {position: snp} | |
else: | |
sampledict[name].update({position: snp}) | |
return sampledict, positions | |
def writeSampleDict(sampledict, mergedpositions, group, out): | |
for sample,posdict in sampledict.items(): | |
line = str(group)+'\t'+sample | |
for pos in mergedpositions: | |
snp = posdict.get(pos, '0') #if no pos, mark '0' | |
line += '\t'+snp | |
line += '\n' | |
#print line | |
out.write(line) | |
def mergeSNP(file1, file2, outfile): | |
sampledict1,positions1 = getSampleDict(file1) | |
sampledict2,positions2 = getSampleDict(file2) | |
mergedpositions = sorted(set(positions1).union(positions2)) | |
#print mergedpositions | |
with open(outfile, 'w') as out: | |
out.write('Group\tSample\t'+'\t'.join(mergedpositions)+'\n') | |
writeSampleDict(sampledict1, mergedpositions, 1, out) | |
writeSampleDict(sampledict2, mergedpositions, 2, out) | |
if __name__ == '__main__': | |
import sys | |
if len(sys.argv) < 4: | |
print "Usage: python %s <file1> <file2> <outfile>" % sys.argv[0] | |
exit(1) | |
mergeSNP(sys.argv[1], sys.argv[2], sys.argv[3]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment