-
-
Save KaiSmith/af733a3b544548c9b6c6 to your computer and use it in GitHub Desktop.
IPython Notebook tutorial for using genda: Loading and Viewing Data
Viewable at http://nbviewer.ipython.org/af733a3b544548c9b6c6
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "genda - Loading and Viewing Data" | |
}, | |
"nbformat": 2, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": true, | |
"input": [ | |
"from genda.formats.panVCF import VCF", | |
"from genda.formats.Snp_array import SNP_array", | |
"from genda.formats.PED import PED" | |
], | |
"language": "python", | |
"outputs": [], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": true, | |
"input": [ | |
"v = VCF('./tests/data/chr22.test.vcf')" | |
], | |
"language": "python", | |
"outputs": [], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Preview VCF", | |
"v.vcf.ix[0:10,0:7]" | |
], | |
"language": "python", | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">", | |
"<table border=\"1\" class=\"dataframe\">", | |
" <thead>", | |
" <tr style=\"text-align: right;\">", | |
" <th></th>", | |
" <th>#CHROM</th>", | |
" <th>POS</th>", | |
" <th>REF</th>", | |
" <th>ALT</th>", | |
" <th>QUAL</th>", | |
" <th>FILTER</th>", | |
" <th>FORMAT</th>", | |
" </tr>", | |
" </thead>", | |
" <tbody>", | |
" <tr>", | |
" <th>rs149201999</th>", | |
" <td> 22</td>", | |
" <td> 16050408</td>", | |
" <td> T</td>", | |
" <td> C</td>", | |
" <td> 100</td>", | |
" <td> PASS</td>", | |
" <td> GT:DS:GL</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs146752890</th>", | |
" <td> 22</td>", | |
" <td> 16050612</td>", | |
" <td> C</td>", | |
" <td> G</td>", | |
" <td> 100</td>", | |
" <td> PASS</td>", | |
" <td> GT:DS:GL</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs139377059</th>", | |
" <td> 22</td>", | |
" <td> 16050678</td>", | |
" <td> C</td>", | |
" <td> T</td>", | |
" <td> 100</td>", | |
" <td> PASS</td>", | |
" <td> GT:DS:GL</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs188945759</th>", | |
" <td> 22</td>", | |
" <td> 16050984</td>", | |
" <td> C</td>", | |
" <td> G</td>", | |
" <td> 100</td>", | |
" <td> PASS</td>", | |
" <td> GT:DS:GL</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs6518357</th>", | |
" <td> 22</td>", | |
" <td> 16051107</td>", | |
" <td> C</td>", | |
" <td> A</td>", | |
" <td> 100</td>", | |
" <td> PASS</td>", | |
" <td> GT:DS:GL</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs62224609</th>", | |
" <td> 22</td>", | |
" <td> 16051249</td>", | |
" <td> T</td>", | |
" <td> C</td>", | |
" <td> 100</td>", | |
" <td> PASS</td>", | |
" <td> GT:DS:GL</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs62224610</th>", | |
" <td> 22</td>", | |
" <td> 16051347</td>", | |
" <td> G</td>", | |
" <td> C</td>", | |
" <td> 100</td>", | |
" <td> PASS</td>", | |
" <td> GT:DS:GL</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs143503259</th>", | |
" <td> 22</td>", | |
" <td> 16051453</td>", | |
" <td> A</td>", | |
" <td> C</td>", | |
" <td> 100</td>", | |
" <td> PASS</td>", | |
" <td> GT:DS:GL</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs192339082</th>", | |
" <td> 22</td>", | |
" <td> 16051477</td>", | |
" <td> C</td>", | |
" <td> A</td>", | |
" <td> 100</td>", | |
" <td> PASS</td>", | |
" <td> GT:DS:GL</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs79725552</th>", | |
" <td> 22</td>", | |
" <td> 16051480</td>", | |
" <td> T</td>", | |
" <td> C</td>", | |
" <td> 100</td>", | |
" <td> PASS</td>", | |
" <td> GT:DS:GL</td>", | |
" </tr>", | |
" </tbody>", | |
"</table>", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 3, | |
"text": [ | |
" #CHROM POS REF ALT QUAL FILTER FORMAT", | |
"rs149201999 22 16050408 T C 100 PASS GT:DS:GL", | |
"rs146752890 22 16050612 C G 100 PASS GT:DS:GL", | |
"rs139377059 22 16050678 C T 100 PASS GT:DS:GL", | |
"rs188945759 22 16050984 C G 100 PASS GT:DS:GL", | |
"rs6518357 22 16051107 C A 100 PASS GT:DS:GL", | |
"rs62224609 22 16051249 T C 100 PASS GT:DS:GL", | |
"rs62224610 22 16051347 G C 100 PASS GT:DS:GL", | |
"rs143503259 22 16051453 A C 100 PASS GT:DS:GL", | |
"rs192339082 22 16051477 C A 100 PASS GT:DS:GL", | |
"rs79725552 22 16051480 T C 100 PASS GT:DS:GL" | |
] | |
} | |
], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Preview genotype matrix", | |
"v.geno.ix[0:10,0:7]" | |
], | |
"language": "python", | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">", | |
"<table border=\"1\" class=\"dataframe\">", | |
" <thead>", | |
" <tr style=\"text-align: right;\">", | |
" <th></th>", | |
" <th>HG00096</th>", | |
" <th>HG00097</th>", | |
" <th>HG00099</th>", | |
" <th>HG00100</th>", | |
" <th>HG00101</th>", | |
" <th>HG00102</th>", | |
" <th>HG00103</th>", | |
" </tr>", | |
" </thead>", | |
" <tbody>", | |
" <tr>", | |
" <th>rs149201999</th>", | |
" <td> 0</td>", | |
" <td> 1</td>", | |
" <td> 1</td>", | |
" <td> 0</td>", | |
" <td> 1</td>", | |
" <td> 1</td>", | |
" <td> 0</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs146752890</th>", | |
" <td> 1</td>", | |
" <td> 1</td>", | |
" <td> 1</td>", | |
" <td> 0</td>", | |
" <td> 1</td>", | |
" <td> 0</td>", | |
" <td> 0</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs139377059</th>", | |
" <td> 0</td>", | |
" <td> 1</td>", | |
" <td> 1</td>", | |
" <td> 0</td>", | |
" <td> 1</td>", | |
" <td> 1</td>", | |
" <td> 0</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs188945759</th>", | |
" <td> 0</td>", | |
" <td> 0</td>", | |
" <td> 0</td>", | |
" <td> 0</td>", | |
" <td> 0</td>", | |
" <td> 0</td>", | |
" <td> 0</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs6518357</th>", | |
" <td> 0</td>", | |
" <td> 1</td>", | |
" <td> 1</td>", | |
" <td> 0</td>", | |
" <td> 1</td>", | |
" <td> 1</td>", | |
" <td> 0</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs62224609</th>", | |
" <td> 0</td>", | |
" <td> 1</td>", | |
" <td> 1</td>", | |
" <td> 0</td>", | |
" <td> 0</td>", | |
" <td> 1</td>", | |
" <td> 0</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs62224610</th>", | |
" <td> 0</td>", | |
" <td> 1</td>", | |
" <td> 1</td>", | |
" <td> 1</td>", | |
" <td> 1</td>", | |
" <td> 1</td>", | |
" <td> 0</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs143503259</th>", | |
" <td> 0</td>", | |
" <td> 1</td>", | |
" <td> 1</td>", | |
" <td> 0</td>", | |
" <td> 0</td>", | |
" <td> 1</td>", | |
" <td> 0</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs192339082</th>", | |
" <td> 0</td>", | |
" <td> 0</td>", | |
" <td> 0</td>", | |
" <td> 0</td>", | |
" <td> 0</td>", | |
" <td> 0</td>", | |
" <td> 0</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs79725552</th>", | |
" <td> 0</td>", | |
" <td> 1</td>", | |
" <td> 1</td>", | |
" <td> 0</td>", | |
" <td> 1</td>", | |
" <td> 1</td>", | |
" <td> 0</td>", | |
" </tr>", | |
" </tbody>", | |
"</table>", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 4, | |
"text": [ | |
" HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103", | |
"rs149201999 0 1 1 0 1 1 0", | |
"rs146752890 1 1 1 0 1 0 0", | |
"rs139377059 0 1 1 0 1 1 0", | |
"rs188945759 0 0 0 0 0 0 0", | |
"rs6518357 0 1 1 0 1 1 0", | |
"rs62224609 0 1 1 0 0 1 0", | |
"rs62224610 0 1 1 1 1 1 0", | |
"rs143503259 0 1 1 0 0 1 0", | |
"rs192339082 0 0 0 0 0 0 0", | |
"rs79725552 0 1 1 0 1 1 0" | |
] | |
} | |
], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": true, | |
"input": [], | |
"language": "python", | |
"outputs": [], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": true, | |
"input": [ | |
"#Load in data from a SNP array which is formatted with both alleles in one column (eg. 23 and me data)", | |
"s = SNP_array('./tests/data/one-column-test-data', fileformat='one column', delim = '\\t',", | |
" encoding = {'rs4477212':'A/G','rs3094315':'A/G','rs3131972':'G/A','rs12124819':'A/G','rs11240777':'A/G',\\", | |
" 'rs6681049':'C/T','rs4970383':'T/C','rs4475691':'T/C','rs7537756':'A/T'})" | |
], | |
"language": "python", | |
"outputs": [], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Preview data from SNP array", | |
"s.df.ix[0:10,:]" | |
], | |
"language": "python", | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">", | |
"<table border=\"1\" class=\"dataframe\">", | |
" <thead>", | |
" <tr style=\"text-align: right;\">", | |
" <th></th>", | |
" <th>rsid</th>", | |
" <th>chromosome</th>", | |
" <th>position</th>", | |
" <th>genotype</th>", | |
" </tr>", | |
" <tr>", | |
" <th>rsid</th>", | |
" <th></th>", | |
" <th></th>", | |
" <th></th>", | |
" <th></th>", | |
" </tr>", | |
" </thead>", | |
" <tbody>", | |
" <tr>", | |
" <th>rs4477212</th>", | |
" <td> rs4477212</td>", | |
" <td> 1</td>", | |
" <td> 72017</td>", | |
" <td> AA</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs3094315</th>", | |
" <td> rs3094315</td>", | |
" <td> 1</td>", | |
" <td> 742429</td>", | |
" <td> AA</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs3131972</th>", | |
" <td> rs3131972</td>", | |
" <td> 1</td>", | |
" <td> 742584</td>", | |
" <td> GG</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs12124819</th>", | |
" <td> rs12124819</td>", | |
" <td> 1</td>", | |
" <td> 766409</td>", | |
" <td> AG</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs11240777</th>", | |
" <td> rs11240777</td>", | |
" <td> 1</td>", | |
" <td> 788822</td>", | |
" <td> GG</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs6681049</th>", | |
" <td> rs6681049</td>", | |
" <td> 1</td>", | |
" <td> 789870</td>", | |
" <td> CC</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs4970383</th>", | |
" <td> rs4970383</td>", | |
" <td> 1</td>", | |
" <td> 828418</td>", | |
" <td> CC</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs4475691</th>", | |
" <td> rs4475691</td>", | |
" <td> 1</td>", | |
" <td> 836671</td>", | |
" <td> CC</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs7537756</th>", | |
" <td> rs7537756</td>", | |
" <td> 1</td>", | |
" <td> 844113</td>", | |
" <td> AA</td>", | |
" </tr>", | |
" </tbody>", | |
"</table>", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 6, | |
"text": [ | |
" rsid chromosome position genotype", | |
"rsid ", | |
"rs4477212 rs4477212 1 72017 AA", | |
"rs3094315 rs3094315 1 742429 AA", | |
"rs3131972 rs3131972 1 742584 GG", | |
"rs12124819 rs12124819 1 766409 AG", | |
"rs11240777 rs11240777 1 788822 GG", | |
"rs6681049 rs6681049 1 789870 CC", | |
"rs4970383 rs4970383 1 828418 CC", | |
"rs4475691 rs4475691 1 836671 CC", | |
"rs7537756 rs7537756 1 844113 AA" | |
] | |
} | |
], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Preview genotype data from SNP array", | |
"s.geno.ix[0:10,:]" | |
], | |
"language": "python", | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">", | |
"<table border=\"1\" class=\"dataframe\">", | |
" <thead>", | |
" <tr style=\"text-align: right;\">", | |
" <th></th>", | |
" <th>genotype</th>", | |
" </tr>", | |
" <tr>", | |
" <th>rsid</th>", | |
" <th></th>", | |
" </tr>", | |
" </thead>", | |
" <tbody>", | |
" <tr>", | |
" <th>rs4477212</th>", | |
" <td> 0</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs3094315</th>", | |
" <td> 0</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs3131972</th>", | |
" <td> 0</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs12124819</th>", | |
" <td> 1</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs11240777</th>", | |
" <td> 2</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs6681049</th>", | |
" <td> 0</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs4970383</th>", | |
" <td> 2</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs4475691</th>", | |
" <td> 2</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs7537756</th>", | |
" <td> 0</td>", | |
" </tr>", | |
" </tbody>", | |
"</table>", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 7, | |
"text": [ | |
" genotype", | |
"rsid ", | |
"rs4477212 0", | |
"rs3094315 0", | |
"rs3131972 0", | |
"rs12124819 1", | |
"rs11240777 2", | |
"rs6681049 0", | |
"rs4970383 2", | |
"rs4475691 2", | |
"rs7537756 0" | |
] | |
} | |
], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": true, | |
"input": [ | |
"#Load in data form a SNP array with two columns representing the alleles of an individual", | |
"t = SNP_array('tests/data/two-column-test-data', fileformat = 'two column', delim = '\\t',\\", | |
" encoding = {'rs4477212':'A/G','rs3094315':'A/G','rs3131972':'G/A','rs12124819':'A/G',\\", | |
" 'rs11240777':'A/G','rs6681049':'C/T','rs4970383':'T/C','rs4475691':'T/C','rs7537756':'A/T'})" | |
], | |
"language": "python", | |
"outputs": [], | |
"prompt_number": 8 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Preview data", | |
"t.df.ix[0:10,0:4]" | |
], | |
"language": "python", | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">", | |
"<table border=\"1\" class=\"dataframe\">", | |
" <thead>", | |
" <tr style=\"text-align: right;\">", | |
" <th></th>", | |
" <th>chromosome</th>", | |
" <th>Snp.ID</th>", | |
" <th>genetic.position</th>", | |
" <th>bp.position</th>", | |
" </tr>", | |
" <tr>", | |
" <th>Snp.ID</th>", | |
" <th></th>", | |
" <th></th>", | |
" <th></th>", | |
" <th></th>", | |
" </tr>", | |
" </thead>", | |
" <tbody>", | |
" <tr>", | |
" <th>rs4477212</th>", | |
" <td> 1</td>", | |
" <td> rs4477212</td>", | |
" <td> 0</td>", | |
" <td> 72017</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs3094315</th>", | |
" <td> 1</td>", | |
" <td> rs3094315</td>", | |
" <td> 0</td>", | |
" <td> 742429</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs3131972</th>", | |
" <td> 1</td>", | |
" <td> rs3131972</td>", | |
" <td> 0</td>", | |
" <td> 742584</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs12124819</th>", | |
" <td> 1</td>", | |
" <td> rs12124819</td>", | |
" <td> 0</td>", | |
" <td> 766409</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs11240777</th>", | |
" <td> 1</td>", | |
" <td> rs11240777</td>", | |
" <td> 0</td>", | |
" <td> 788822</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs6681049</th>", | |
" <td> 1</td>", | |
" <td> rs6681049</td>", | |
" <td> 0</td>", | |
" <td> 789870</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs4970383</th>", | |
" <td> 1</td>", | |
" <td> rs4970383</td>", | |
" <td> 0</td>", | |
" <td> 828418</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs4475691</th>", | |
" <td> 1</td>", | |
" <td> rs4475691</td>", | |
" <td> 0</td>", | |
" <td> 836671</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs7537756</th>", | |
" <td> 1</td>", | |
" <td> rs7537756</td>", | |
" <td> 0</td>", | |
" <td> 844113</td>", | |
" </tr>", | |
" </tbody>", | |
"</table>", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 9, | |
"text": [ | |
" chromosome Snp.ID genetic.position bp.position", | |
"Snp.ID ", | |
"rs4477212 1 rs4477212 0 72017", | |
"rs3094315 1 rs3094315 0 742429", | |
"rs3131972 1 rs3131972 0 742584", | |
"rs12124819 1 rs12124819 0 766409", | |
"rs11240777 1 rs11240777 0 788822", | |
"rs6681049 1 rs6681049 0 789870", | |
"rs4970383 1 rs4970383 0 828418", | |
"rs4475691 1 rs4475691 0 836671", | |
"rs7537756 1 rs7537756 0 844113" | |
] | |
} | |
], | |
"prompt_number": 9 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Preview genotype matrix", | |
"t.geno.ix[0:10,0:7]" | |
], | |
"language": "python", | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">", | |
"<table border=\"1\" class=\"dataframe\">", | |
" <thead>", | |
" <tr style=\"text-align: right;\">", | |
" <th></th>", | |
" <th>person1_alle</th>", | |
" <th>person2_alle</th>", | |
" </tr>", | |
" <tr>", | |
" <th>Snp.ID</th>", | |
" <th></th>", | |
" <th></th>", | |
" </tr>", | |
" </thead>", | |
" <tbody>", | |
" <tr>", | |
" <th>rs4477212</th>", | |
" <td> 0</td>", | |
" <td> 0</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs3094315</th>", | |
" <td> 0</td>", | |
" <td> 1</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs3131972</th>", | |
" <td> 0</td>", | |
" <td> 1</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs12124819</th>", | |
" <td> 1</td>", | |
" <td> 1</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs11240777</th>", | |
" <td> 2</td>", | |
" <td> 2</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs6681049</th>", | |
" <td> 0</td>", | |
" <td> 1</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs4970383</th>", | |
" <td> 2</td>", | |
" <td> 2</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs4475691</th>", | |
" <td> 2</td>", | |
" <td> 0</td>", | |
" </tr>", | |
" <tr>", | |
" <th>rs7537756</th>", | |
" <td> 0</td>", | |
" <td> 1</td>", | |
" </tr>", | |
" </tbody>", | |
"</table>", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 10, | |
"text": [ | |
" person1_alle person2_alle", | |
"Snp.ID ", | |
"rs4477212 0 0", | |
"rs3094315 0 1", | |
"rs3131972 0 1", | |
"rs12124819 1 1", | |
"rs11240777 2 2", | |
"rs6681049 0 1", | |
"rs4970383 2 2", | |
"rs4475691 2 0", | |
"rs7537756 0 1" | |
] | |
} | |
], | |
"prompt_number": 10 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": true, | |
"input": [], | |
"language": "python", | |
"outputs": [], | |
"prompt_number": 10 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": true, | |
"input": [ | |
"#Load in a PED file", | |
"p = PED('tests/data/test.ped', 'tests/data/test.map', {'snp1':'A/C','snp2':'A/C','snp3':'C/A','snp4':'T/G','snp5':'C/A'})" | |
], | |
"language": "python", | |
"outputs": [], | |
"prompt_number": 11 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#See the parsed out PED file", | |
"p.PED" | |
], | |
"language": "python", | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">", | |
"<table border=\"1\" class=\"dataframe\">", | |
" <thead>", | |
" <tr style=\"text-align: right;\">", | |
" <th></th>", | |
" <th>0</th>", | |
" <th>1</th>", | |
" <th>2</th>", | |
" <th>3</th>", | |
" <th>4</th>", | |
" <th>5</th>", | |
" <th>6</th>", | |
" <th>7</th>", | |
" <th>8</th>", | |
" <th>9</th>", | |
" <th>10</th>", | |
" <th>11</th>", | |
" <th>12</th>", | |
" <th>13</th>", | |
" <th>14</th>", | |
" <th>15</th>", | |
" </tr>", | |
" </thead>", | |
" <tbody>", | |
" <tr>", | |
" <th>0</th>", | |
" <td> 1</td>", | |
" <td> sample1</td>", | |
" <td> 0</td>", | |
" <td> 0</td>", | |
" <td> 1</td>", | |
" <td> 1</td>", | |
" <td> A</td>", | |
" <td> A</td>", | |
" <td> A</td>", | |
" <td> A</td>", | |
" <td> A</td>", | |
" <td> A</td>", | |
" <td> T</td>", | |
" <td> G</td>", | |
" <td> A</td>", | |
" <td> A</td>", | |
" </tr>", | |
" <tr>", | |
" <th>1</th>", | |
" <td> 2</td>", | |
" <td> sample2</td>", | |
" <td> 0</td>", | |
" <td> 0</td>", | |
" <td> 1</td>", | |
" <td> 1</td>", | |
" <td> A</td>", | |
" <td> C</td>", | |
" <td> A</td>", | |
" <td> C</td>", | |
" <td> A</td>", | |
" <td> C</td>", | |
" <td> T</td>", | |
" <td> G</td>", | |
" <td> A</td>", | |
" <td> C</td>", | |
" </tr>", | |
" <tr>", | |
" <th>2</th>", | |
" <td> 3</td>", | |
" <td> sample3</td>", | |
" <td> 0</td>", | |
" <td> 0</td>", | |
" <td> 2</td>", | |
" <td> 1</td>", | |
" <td> A</td>", | |
" <td> A</td>", | |
" <td> A</td>", | |
" <td> A</td>", | |
" <td> A</td>", | |
" <td> A</td>", | |
" <td> G</td>", | |
" <td> G</td>", | |
" <td> A</td>", | |
" <td> A</td>", | |
" </tr>", | |
" <tr>", | |
" <th>3</th>", | |
" <td> 4</td>", | |
" <td> sample4</td>", | |
" <td> 0</td>", | |
" <td> 0</td>", | |
" <td> 2</td>", | |
" <td> 1</td>", | |
" <td> A</td>", | |
" <td> 0</td>", | |
" <td> A</td>", | |
" <td> C</td>", | |
" <td> A</td>", | |
" <td> C</td>", | |
" <td> G</td>", | |
" <td> G</td>", | |
" <td> A</td>", | |
" <td> C</td>", | |
" </tr>", | |
" </tbody>", | |
"</table>", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 12, | |
"text": [ | |
" 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15", | |
"0 1 sample1 0 0 1 1 A A A A A A T G A A", | |
"1 2 sample2 0 0 1 1 A C A C A C T G A C", | |
"2 3 sample3 0 0 2 1 A A A A A A G G A A", | |
"3 4 sample4 0 0 2 1 A 0 A C A C G G A C" | |
] | |
} | |
], | |
"prompt_number": 12 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#See parsed out MAP file", | |
"p.MAP" | |
], | |
"language": "python", | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">", | |
"<table border=\"1\" class=\"dataframe\">", | |
" <thead>", | |
" <tr style=\"text-align: right;\">", | |
" <th></th>", | |
" <th>0</th>", | |
" <th>1</th>", | |
" <th>2</th>", | |
" <th>3</th>", | |
" </tr>", | |
" </thead>", | |
" <tbody>", | |
" <tr>", | |
" <th>0</th>", | |
" <td> 1</td>", | |
" <td> snp1</td>", | |
" <td> 0</td>", | |
" <td> 1000</td>", | |
" </tr>", | |
" <tr>", | |
" <th>1</th>", | |
" <td> X</td>", | |
" <td> snp2</td>", | |
" <td> 0</td>", | |
" <td> 1000</td>", | |
" </tr>", | |
" <tr>", | |
" <th>2</th>", | |
" <td> Y</td>", | |
" <td> snp3</td>", | |
" <td> 0</td>", | |
" <td> 1000</td>", | |
" </tr>", | |
" <tr>", | |
" <th>3</th>", | |
" <td> XY</td>", | |
" <td> snp4</td>", | |
" <td> 0</td>", | |
" <td> 1000</td>", | |
" </tr>", | |
" <tr>", | |
" <th>4</th>", | |
" <td> MT</td>", | |
" <td> snp5</td>", | |
" <td> 0</td>", | |
" <td> 1000</td>", | |
" </tr>", | |
" </tbody>", | |
"</table>", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 13, | |
"text": [ | |
" 0 1 2 3", | |
"0 1 snp1 0 1000", | |
"1 X snp2 0 1000", | |
"2 Y snp3 0 1000", | |
"3 XY snp4 0 1000", | |
"4 MT snp5 0 1000" | |
] | |
} | |
], | |
"prompt_number": 13 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Vieing the genotype matrix", | |
"p.geno" | |
], | |
"language": "python", | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">", | |
"<table border=\"1\" class=\"dataframe\">", | |
" <thead>", | |
" <tr style=\"text-align: right;\">", | |
" <th>1</th>", | |
" <th>sample1</th>", | |
" <th>sample2</th>", | |
" <th>sample3</th>", | |
" <th>sample4</th>", | |
" </tr>", | |
" <tr>", | |
" <th>1</th>", | |
" <th></th>", | |
" <th></th>", | |
" <th></th>", | |
" <th></th>", | |
" </tr>", | |
" </thead>", | |
" <tbody>", | |
" <tr>", | |
" <th>snp1</th>", | |
" <td> 0</td>", | |
" <td> 1</td>", | |
" <td> 0</td>", | |
" <td>NaN</td>", | |
" </tr>", | |
" <tr>", | |
" <th>snp2</th>", | |
" <td> 0</td>", | |
" <td> 1</td>", | |
" <td> 0</td>", | |
" <td> 1</td>", | |
" </tr>", | |
" <tr>", | |
" <th>snp3</th>", | |
" <td> 2</td>", | |
" <td> 1</td>", | |
" <td> 2</td>", | |
" <td> 1</td>", | |
" </tr>", | |
" <tr>", | |
" <th>snp4</th>", | |
" <td> 1</td>", | |
" <td> 1</td>", | |
" <td> 2</td>", | |
" <td> 2</td>", | |
" </tr>", | |
" <tr>", | |
" <th>snp5</th>", | |
" <td> 2</td>", | |
" <td> 1</td>", | |
" <td> 2</td>", | |
" <td> 1</td>", | |
" </tr>", | |
" </tbody>", | |
"</table>", | |
"</div>" | |
], | |
"output_type": "pyout", | |
"prompt_number": 14, | |
"text": [ | |
"1 sample1 sample2 sample3 sample4", | |
"1 ", | |
"snp1 0 1 0 NaN", | |
"snp2 0 1 0 1", | |
"snp3 2 1 2 1", | |
"snp4 1 1 2 2", | |
"snp5 2 1 2 1" | |
] | |
} | |
], | |
"prompt_number": 14 | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment