Skip to content

Instantly share code, notes, and snippets.

@xiaojay
Created March 25, 2016 01:48
Show Gist options
  • Save xiaojay/0602d146e07dcfdfdafc to your computer and use it in GitHub Desktop.
Save xiaojay/0602d146e07dcfdfdafc to your computer and use it in GitHub Desktop.
check 23andme data
#coding=utf-8
import os,sys,argparse,zipfile
def parse_23andme_file(fn):
data = {}
suffix = fn.split('.')[-1].lower()
if suffix == 'zip':
z = zipfile.ZipFile(fn, 'r')
content = z.read(z.namelist()[0])
else:
content = open(fn).read()
for line in content.split('\n'):
if line.startswith('#'):
continue
line = line.strip()
rsid = line.split('\t')[0].lower()
geno = line.split('\t')[-1].strip()
data[rsid] = geno
return data
def reverse(s):
l = []
for i in s:
l.append(i)
l.reverse()
return ''.join(l)
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--father", type=str, help=u"父亲的基因文件")
parser.add_argument("-m", "--mother", type=str, help=u"母亲的基因文件")
parser.add_argument("-c", "--child", type=str, help=u"孩子的基因文件")
args = parser.parse_args()
f = parse_23andme_file(args.father)
m = parse_23andme_file(args.mother)
c = parse_23andme_file(args.child)
error = 0
correct = 0
for k in c.keys():
try:
rc = c[k]
rf = f[k]
rm = m[k]
except KeyError, e:
#print e
#print k
continue
if len(rc)!=2 or len(rf)!=2 or len(rm) !=2:
continue
if rc == '--' or rf == '--' or rm == '--':
continue
generate = []
for i in [rf[0], rf[1]]:
for j in [rm[0], rm[1]]:
generate.append('%s%s'%(i, j))
flip = [reverse(i) for i in generate]
generate += flip
if rc not in generate:
print k, rc, rf, rm
error +=1
else:
correct += 1
print '%i/%i'%(error, correct)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment