Skip to content

Instantly share code, notes, and snippets.

@ricarkol
Created March 16, 2017 22:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ricarkol/863d57f51dde068eb3c97d90e0a932f3 to your computer and use it in GitHub Desktop.
Save ricarkol/863d57f51dde068eb3c97d90e0a932f3 to your computer and use it in GitHub Desktop.
Messing with 23andme raw exported data
import sys
# Parse an exported raw genome data file from 23andme.com, wihch is basically
# one SNP per line. Each SNP is looked up in a small dictionary of
# "interesting" genes, like the ones related to celiac disease,
# lactointolerance, or obesity. The first and only arg is the name of the file
# (umcompressed).
db = {'rs2187668': {'geno':
{'AA': 'Autoimmune disorder risk (lupus, celiac disease) due to 2 HLA-DRB1*0301 alleles',
'AG': 'Somewhat increased autoimmune disorder (lupus, celiac disease) risk; 1 HLA-DRB1*0301 allele',
'GG': 'average'},
'url':'https://www.snpedia.com/index.php/Rs2187668'},
'rs3184504': {'geno':
{'CC':'normal',
'CT':'increased risk for celiac disease',
'TT':'increased risk for celiac disease'},
'url':'https://www.snpedia.com/index.php/Rs3184504'},
'rs6822844': {'geno':
{'GG': 'increased risk for celiac disease. Common on affy axiom data',
'GT': 'n/a',
'TT': 'n/a'},
'url': 'https://www.snpedia.com/index.php/Rs6822844'},
'rs4988235': {'geno':
{'CC': 'likely to be lactose intolerant as an adult',
'CT': 'likely to be able to digest milk as an adult',
'TT': 'can digest milk'},
'url': 'https://www.snpedia.com/index.php/Rs4988235'},
'rs182549': {'geno':
{'CC': 'possibly lactose intolerant',
'CT': 'Can digest milk.',
'TT':'Can digest milk.'},
'url': 'https://www.snpedia.com/index.php/Rs182549'},
'rs1799971': {'geno':
{'AA': 'normal',
'AG': '2.5 stronger cravings for alcohol. if alcoholic, naltrexone treatment 2x more successful',
'GG': 'more pain'},
'url': 'https://www.snpedia.com/index.php/Rs1799971'},
'rs9939609': {'geno':
{'AA': 'obesity risk and 1.6x risk for Type-2 diabetes',
'AT': 'risk for Type-2 diabetes; obesity risk',
'TT': 'lower risk of obesity and Type-2 diabetes'},
'url': 'https://www.snpedia.com/index.php/Rs1799971'}
}
infile = sys.argv[1]
print '#%-9s %-10s %-10s' % ('marker','chromosome', 'genotype')
with open(infile) as inf:
for line in inf:
if line[0] == '#':
continue
rsid, chromosome, position, genotype = line.split()
if db.has_key(rsid):
if db[rsid]['geno'].has_key(genotype):
print '%-10s %-10s %-10s %s (%s)' % (rsid, chromosome, genotype,
db[rsid]['geno'][genotype], db[rsid]['url'])
else:
print '%-10s %-10s %-10s %s (%s)' % (rsid, chromosome, genotype, '??', db[rsid]['url'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment