Skip to content

Instantly share code, notes, and snippets.

@alimanfoo
Created March 24, 2022 17:28
Show Gist options
  • Save alimanfoo/7bd8d1ccef74f4aa3c8bdb6215fd1189 to your computer and use it in GitHub Desktop.
Save alimanfoo/7bd8d1ccef74f4aa3c8bdb6215fd1189 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "e77a6e77-2d47-44e9-a7de-f627132c5ccd",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Copying gs://vo_agam_production/v3.x/curation/1272-VO-GM-OPONDO-VMF00160/snp_genotypes/VBS47720-6366STDY9854329.zarr.zip...\n",
"\\ [1 files][468.4 MiB/468.4 MiB] \n",
"Operation completed over 1 objects/468.4 MiB. \n"
]
}
],
"source": [
"!gsutil cp gs://vo_agam_production/v3.x/curation/1272-VO-GM-OPONDO-VMF00160/snp_genotypes/VBS47720-6366STDY9854329.zarr.zip ."
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "f52388ed-f59a-4667-a56b-c850842250fb",
"metadata": {},
"outputs": [],
"source": [
"import zarr\n",
"from fsspec.implementations.zip import ZipFileSystem\n",
"import fsspec"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "de6cfe37-be08-4972-81a2-7c578c562823",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<zarr.hierarchy.Group '/'>"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"expected_sites = zarr.open(fsspec.get_mapper(\"gs://vo_agam_release/v3/snp_genotypes/all/sites\"))\n",
"expected_sites"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "407faf0b-273f-489d-89d9-049986cfd1d1",
"metadata": {},
"outputs": [],
"source": [
"def check_snp_genotype_zarr(path, sample_id, expected_sites):\n",
"\n",
" fs = ZipFileSystem(path)\n",
" store = fs.get_mapper(\"/\")\n",
" root = zarr.open(store=store)\n",
"\n",
" expected_contigs = sorted(expected_sites)\n",
" # exception - Mt not included in SNP calling\n",
" expected_contigs = [c for c in expected_contigs if c != \"Mt\"]\n",
" \n",
" for contig in expected_contigs:\n",
" \n",
" # check arrays exists\n",
" mq = root[f\"/{sample_id}/{contig}/variants/MQ\"]\n",
" gt = root[f\"/{sample_id}/{contig}/calldata/GT\"]\n",
" gq = root[f\"/{sample_id}/{contig}/calldata/GQ\"]\n",
" ad = root[f\"/{sample_id}/{contig}/calldata/AD\"]\n",
" assert isinstance(mq, zarr.core.Array)\n",
" assert isinstance(gt, zarr.core.Array)\n",
" assert isinstance(gq, zarr.core.Array)\n",
" assert isinstance(ad, zarr.core.Array)\n",
" \n",
" # check array shapes\n",
" expected_n_sites = expected_sites[f\"{contig}/variants/POS\"].shape[0]\n",
" assert mq.shape == (expected_n_sites,)\n",
" assert gq.shape == (expected_n_sites, 1)\n",
" assert gt.shape == (expected_n_sites, 1, 2)\n",
" assert ad.shape == (expected_n_sites, 1, 4)\n",
" \n",
" for x in mq, gq, gt, ad:\n",
" \n",
" # check all chunks are initialised and present\n",
" assert x.nchunks == x.nchunks_initialized\n",
" \n",
" # check we can read the data\n",
" a = x[:]\n"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "44edc657-ac58-4303-82db-05080f26c30a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 26.9 s, sys: 2.66 s, total: 29.6 s\n",
"Wall time: 25.6 s\n"
]
}
],
"source": [
"%%time\n",
"check_snp_genotype_zarr(\n",
" path=\"VBS47720-6366STDY9854329.zarr.zip\", \n",
" sample_id=\"VBS47720-6366STDY9854329\", \n",
" expected_sites=expected_sites\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e85a1348-6e3f-4cb7-8464-23fc6c555f4c",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:binder-v3.3.0]",
"language": "python",
"name": "conda-env-binder-v3.3.0-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment