Created
March 25, 2016 01:48
-
-
Save xiaojay/0602d146e07dcfdfdafc to your computer and use it in GitHub Desktop.
check 23andme data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#coding=utf-8 | |
import os,sys,argparse,zipfile | |
def parse_23andme_file(fn): | |
data = {} | |
suffix = fn.split('.')[-1].lower() | |
if suffix == 'zip': | |
z = zipfile.ZipFile(fn, 'r') | |
content = z.read(z.namelist()[0]) | |
else: | |
content = open(fn).read() | |
for line in content.split('\n'): | |
if line.startswith('#'): | |
continue | |
line = line.strip() | |
rsid = line.split('\t')[0].lower() | |
geno = line.split('\t')[-1].strip() | |
data[rsid] = geno | |
return data | |
def reverse(s): | |
l = [] | |
for i in s: | |
l.append(i) | |
l.reverse() | |
return ''.join(l) | |
parser = argparse.ArgumentParser() | |
parser.add_argument("-f", "--father", type=str, help=u"父亲的基因文件") | |
parser.add_argument("-m", "--mother", type=str, help=u"母亲的基因文件") | |
parser.add_argument("-c", "--child", type=str, help=u"孩子的基因文件") | |
args = parser.parse_args() | |
f = parse_23andme_file(args.father) | |
m = parse_23andme_file(args.mother) | |
c = parse_23andme_file(args.child) | |
error = 0 | |
correct = 0 | |
for k in c.keys(): | |
try: | |
rc = c[k] | |
rf = f[k] | |
rm = m[k] | |
except KeyError, e: | |
#print e | |
#print k | |
continue | |
if len(rc)!=2 or len(rf)!=2 or len(rm) !=2: | |
continue | |
if rc == '--' or rf == '--' or rm == '--': | |
continue | |
generate = [] | |
for i in [rf[0], rf[1]]: | |
for j in [rm[0], rm[1]]: | |
generate.append('%s%s'%(i, j)) | |
flip = [reverse(i) for i in generate] | |
generate += flip | |
if rc not in generate: | |
print k, rc, rf, rm | |
error +=1 | |
else: | |
correct += 1 | |
print '%i/%i'%(error, correct) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment