Created
May 5, 2020 14:54
-
-
Save quasiben/e52bc740ae22ae321f30987c65998078 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import os" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"os.environ['UCX_TLS']='tcp,sockcm,cuda_copy,cuda_ipc'\n", | |
"os.environ['UCX_SOCKADDR_TLS_PRIORITY']='sockcm'\n", | |
"os.environ[\"UCX_RNDV_SCHEME\"] = \"get_zcopy\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from dask.distributed import Client\n", | |
"from dask_cuda import LocalCUDACluster\n", | |
"from dask_cuda.initialize import initialize\n", | |
"from dask.utils import parse_bytes\n", | |
"from dask.distributed import performance_report\n", | |
"from dask.distributed import wait\n", | |
"from dask.distributed import get_task_stream\n", | |
"\n", | |
"# science and data!\n", | |
"import gcsfs\n", | |
"import zarr\n", | |
"import allel\n", | |
"\n", | |
"import cupy as cp\n", | |
"import rmm\n", | |
"import cudf\n", | |
"import dask.array as da\n", | |
"import numpy as np\n", | |
"\n", | |
"# Configurations\n", | |
"protocol = \"ucx\"\n", | |
"interface = \"enp134s0f1\" # DGX-2\n", | |
"enable_tcp_over_ucx = True\n", | |
"enable_nvlink = True\n", | |
"enable_infiniband = False\n", | |
"\n", | |
"initialize(\n", | |
" create_cuda_context=True,\n", | |
" enable_tcp_over_ucx=enable_tcp_over_ucx,\n", | |
" enable_infiniband=enable_infiniband,\n", | |
" enable_nvlink=enable_nvlink,\n", | |
")\n", | |
"\n", | |
"cluster = LocalCUDACluster(local_directory=\"/tmp/bzaitlen\", \n", | |
" protocol=protocol,\n", | |
" interface=interface,\n", | |
" enable_tcp_over_ucx=enable_tcp_over_ucx,\n", | |
" enable_infiniband=enable_infiniband,\n", | |
" enable_nvlink=enable_nvlink,\n", | |
" )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"client = Client(cluster)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def setup_rmm_pool(client):\n", | |
" client.run(\n", | |
" cudf.set_allocator,\n", | |
" pool=True,\n", | |
" initial_pool_size= parse_bytes(\"26GB\"),\n", | |
" allocator=\"default\"\n", | |
" )\n", | |
" client.run(\n", | |
" cp.cuda.set_allocator,\n", | |
" rmm.rmm_cupy_allocator\n", | |
" )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"setup_rmm_pool(client)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# pull genotype data from GCSFS\n", | |
"gcs_anon = gcsfs.GCSFileSystem(token='anon', \n", | |
" access='read_only')\n", | |
"storage_path = 'ag1000g-release/phase2.AR1/variation/main/' \\\n", | |
" 'zarr/biallelic/ag1000g.phase2.ar1.pass.biallelic'\n", | |
"store = gcsfs.mapping.GCSMap(storage_path, gcs=gcs_anon, \n", | |
" check=False, create=False)\n", | |
"callset = zarr.open_consolidated(store=store)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table>\n", | |
"<tr>\n", | |
"<td>\n", | |
"<table>\n", | |
" <thead>\n", | |
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr><th> Bytes </th><td> 24.56 GB </td> <td> 63.96 MB </td></tr>\n", | |
" <tr><th> Shape </th><td> (10752701, 1142, 2) </td> <td> (524288, 61, 2) </td></tr>\n", | |
" <tr><th> Count </th><td> 400 Tasks </td><td> 399 Chunks </td></tr>\n", | |
" <tr><th> Type </th><td> int8 </td><td> numpy.ndarray </td></tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</td>\n", | |
"<td>\n", | |
"<svg width=\"156\" height=\"146\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n", | |
"\n", | |
" <!-- Horizontal lines -->\n", | |
" <line x1=\"10\" y1=\"0\" x2=\"80\" y2=\"70\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"10\" y1=\"1\" x2=\"80\" y2=\"71\" />\n", | |
" <line x1=\"10\" y1=\"2\" x2=\"80\" y2=\"73\" />\n", | |
" <line x1=\"10\" y1=\"4\" x2=\"80\" y2=\"74\" />\n", | |
" <line x1=\"10\" y1=\"5\" x2=\"80\" y2=\"76\" />\n", | |
" <line x1=\"10\" y1=\"6\" x2=\"80\" y2=\"77\" />\n", | |
" <line x1=\"10\" y1=\"8\" x2=\"80\" y2=\"78\" />\n", | |
" <line x1=\"10\" y1=\"9\" x2=\"80\" y2=\"80\" />\n", | |
" <line x1=\"10\" y1=\"10\" x2=\"80\" y2=\"81\" />\n", | |
" <line x1=\"10\" y1=\"12\" x2=\"80\" y2=\"82\" />\n", | |
" <line x1=\"10\" y1=\"13\" x2=\"80\" y2=\"84\" />\n", | |
" <line x1=\"10\" y1=\"14\" x2=\"80\" y2=\"85\" />\n", | |
" <line x1=\"10\" y1=\"16\" x2=\"80\" y2=\"86\" />\n", | |
" <line x1=\"10\" y1=\"17\" x2=\"80\" y2=\"88\" />\n", | |
" <line x1=\"10\" y1=\"19\" x2=\"80\" y2=\"89\" />\n", | |
" <line x1=\"10\" y1=\"20\" x2=\"80\" y2=\"90\" />\n", | |
" <line x1=\"10\" y1=\"21\" x2=\"80\" y2=\"92\" />\n", | |
" <line x1=\"10\" y1=\"23\" x2=\"80\" y2=\"93\" />\n", | |
" <line x1=\"10\" y1=\"24\" x2=\"80\" y2=\"95\" />\n", | |
" <line x1=\"10\" y1=\"25\" x2=\"80\" y2=\"96\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Vertical lines -->\n", | |
" <line x1=\"10\" y1=\"0\" x2=\"10\" y2=\"25\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"13\" y1=\"3\" x2=\"13\" y2=\"28\" />\n", | |
" <line x1=\"16\" y1=\"6\" x2=\"16\" y2=\"32\" />\n", | |
" <line x1=\"20\" y1=\"10\" x2=\"20\" y2=\"35\" />\n", | |
" <line x1=\"23\" y1=\"13\" x2=\"23\" y2=\"39\" />\n", | |
" <line x1=\"27\" y1=\"17\" x2=\"27\" y2=\"42\" />\n", | |
" <line x1=\"30\" y1=\"20\" x2=\"30\" y2=\"46\" />\n", | |
" <line x1=\"34\" y1=\"24\" x2=\"34\" y2=\"49\" />\n", | |
" <line x1=\"37\" y1=\"27\" x2=\"37\" y2=\"52\" />\n", | |
" <line x1=\"40\" y1=\"30\" x2=\"40\" y2=\"56\" />\n", | |
" <line x1=\"44\" y1=\"34\" x2=\"44\" y2=\"59\" />\n", | |
" <line x1=\"47\" y1=\"37\" x2=\"47\" y2=\"63\" />\n", | |
" <line x1=\"51\" y1=\"41\" x2=\"51\" y2=\"66\" />\n", | |
" <line x1=\"54\" y1=\"44\" x2=\"54\" y2=\"70\" />\n", | |
" <line x1=\"58\" y1=\"48\" x2=\"58\" y2=\"73\" />\n", | |
" <line x1=\"61\" y1=\"51\" x2=\"61\" y2=\"77\" />\n", | |
" <line x1=\"65\" y1=\"55\" x2=\"65\" y2=\"80\" />\n", | |
" <line x1=\"68\" y1=\"58\" x2=\"68\" y2=\"83\" />\n", | |
" <line x1=\"71\" y1=\"61\" x2=\"71\" y2=\"87\" />\n", | |
" <line x1=\"75\" y1=\"65\" x2=\"75\" y2=\"90\" />\n", | |
" <line x1=\"78\" y1=\"68\" x2=\"78\" y2=\"94\" />\n", | |
" <line x1=\"80\" y1=\"70\" x2=\"80\" y2=\"96\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Colored Rectangle -->\n", | |
" <polygon points=\"10.000000,0.000000 80.588235,70.588235 80.588235,96.000852 10.000000,25.412617\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n", | |
"\n", | |
" <!-- Horizontal lines -->\n", | |
" <line x1=\"10\" y1=\"0\" x2=\"35\" y2=\"0\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"13\" y1=\"3\" x2=\"38\" y2=\"3\" />\n", | |
" <line x1=\"16\" y1=\"6\" x2=\"42\" y2=\"6\" />\n", | |
" <line x1=\"20\" y1=\"10\" x2=\"45\" y2=\"10\" />\n", | |
" <line x1=\"23\" y1=\"13\" x2=\"49\" y2=\"13\" />\n", | |
" <line x1=\"27\" y1=\"17\" x2=\"52\" y2=\"17\" />\n", | |
" <line x1=\"30\" y1=\"20\" x2=\"56\" y2=\"20\" />\n", | |
" <line x1=\"34\" y1=\"24\" x2=\"59\" y2=\"24\" />\n", | |
" <line x1=\"37\" y1=\"27\" x2=\"62\" y2=\"27\" />\n", | |
" <line x1=\"40\" y1=\"30\" x2=\"66\" y2=\"30\" />\n", | |
" <line x1=\"44\" y1=\"34\" x2=\"69\" y2=\"34\" />\n", | |
" <line x1=\"47\" y1=\"37\" x2=\"73\" y2=\"37\" />\n", | |
" <line x1=\"51\" y1=\"41\" x2=\"76\" y2=\"41\" />\n", | |
" <line x1=\"54\" y1=\"44\" x2=\"80\" y2=\"44\" />\n", | |
" <line x1=\"58\" y1=\"48\" x2=\"83\" y2=\"48\" />\n", | |
" <line x1=\"61\" y1=\"51\" x2=\"87\" y2=\"51\" />\n", | |
" <line x1=\"65\" y1=\"55\" x2=\"90\" y2=\"55\" />\n", | |
" <line x1=\"68\" y1=\"58\" x2=\"93\" y2=\"58\" />\n", | |
" <line x1=\"71\" y1=\"61\" x2=\"97\" y2=\"61\" />\n", | |
" <line x1=\"75\" y1=\"65\" x2=\"100\" y2=\"65\" />\n", | |
" <line x1=\"78\" y1=\"68\" x2=\"104\" y2=\"68\" />\n", | |
" <line x1=\"80\" y1=\"70\" x2=\"106\" y2=\"70\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Vertical lines -->\n", | |
" <line x1=\"10\" y1=\"0\" x2=\"80\" y2=\"70\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"35\" y1=\"0\" x2=\"106\" y2=\"70\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Colored Rectangle -->\n", | |
" <polygon points=\"10.000000,0.000000 35.412617,0.000000 106.000852,70.588235 80.588235,70.588235\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n", | |
"\n", | |
" <!-- Horizontal lines -->\n", | |
" <line x1=\"80\" y1=\"70\" x2=\"106\" y2=\"70\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"80\" y1=\"71\" x2=\"106\" y2=\"71\" />\n", | |
" <line x1=\"80\" y1=\"73\" x2=\"106\" y2=\"73\" />\n", | |
" <line x1=\"80\" y1=\"74\" x2=\"106\" y2=\"74\" />\n", | |
" <line x1=\"80\" y1=\"76\" x2=\"106\" y2=\"76\" />\n", | |
" <line x1=\"80\" y1=\"77\" x2=\"106\" y2=\"77\" />\n", | |
" <line x1=\"80\" y1=\"78\" x2=\"106\" y2=\"78\" />\n", | |
" <line x1=\"80\" y1=\"80\" x2=\"106\" y2=\"80\" />\n", | |
" <line x1=\"80\" y1=\"81\" x2=\"106\" y2=\"81\" />\n", | |
" <line x1=\"80\" y1=\"82\" x2=\"106\" y2=\"82\" />\n", | |
" <line x1=\"80\" y1=\"84\" x2=\"106\" y2=\"84\" />\n", | |
" <line x1=\"80\" y1=\"85\" x2=\"106\" y2=\"85\" />\n", | |
" <line x1=\"80\" y1=\"86\" x2=\"106\" y2=\"86\" />\n", | |
" <line x1=\"80\" y1=\"88\" x2=\"106\" y2=\"88\" />\n", | |
" <line x1=\"80\" y1=\"89\" x2=\"106\" y2=\"89\" />\n", | |
" <line x1=\"80\" y1=\"90\" x2=\"106\" y2=\"90\" />\n", | |
" <line x1=\"80\" y1=\"92\" x2=\"106\" y2=\"92\" />\n", | |
" <line x1=\"80\" y1=\"93\" x2=\"106\" y2=\"93\" />\n", | |
" <line x1=\"80\" y1=\"95\" x2=\"106\" y2=\"95\" />\n", | |
" <line x1=\"80\" y1=\"96\" x2=\"106\" y2=\"96\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Vertical lines -->\n", | |
" <line x1=\"80\" y1=\"70\" x2=\"80\" y2=\"96\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"106\" y1=\"70\" x2=\"106\" y2=\"96\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Colored Rectangle -->\n", | |
" <polygon points=\"80.588235,70.588235 106.000852,70.588235 106.000852,96.000852 80.588235,96.000852\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n", | |
"\n", | |
" <!-- Text -->\n", | |
" <text x=\"93.294544\" y=\"116.000852\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >2</text>\n", | |
" <text x=\"126.000852\" y=\"83.294544\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,126.000852,83.294544)\">1142</text>\n", | |
" <text x=\"35.294118\" y=\"80.706734\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(45,35.294118,80.706734)\">10752701</text>\n", | |
"</svg>\n", | |
"</td>\n", | |
"</tr>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"dask.array<array, shape=(10752701, 1142, 2), dtype=int8, chunksize=(524288, 61, 2), chunktype=numpy.ndarray>" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"genotypes = da.from_array(callset['3R/calldata/GT'])\n", | |
"genotypes" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table>\n", | |
"<tr>\n", | |
"<td>\n", | |
"<table>\n", | |
" <thead>\n", | |
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr><th> Bytes </th><td> 24.56 GB </td> <td> 63.96 MB </td></tr>\n", | |
" <tr><th> Shape </th><td> (10752701, 1142, 2) </td> <td> (524288, 61, 2) </td></tr>\n", | |
" <tr><th> Count </th><td> 799 Tasks </td><td> 399 Chunks </td></tr>\n", | |
" <tr><th> Type </th><td> int8 </td><td> cupy.ndarray </td></tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</td>\n", | |
"<td>\n", | |
"<svg width=\"156\" height=\"146\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n", | |
"\n", | |
" <!-- Horizontal lines -->\n", | |
" <line x1=\"10\" y1=\"0\" x2=\"80\" y2=\"70\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"10\" y1=\"1\" x2=\"80\" y2=\"71\" />\n", | |
" <line x1=\"10\" y1=\"2\" x2=\"80\" y2=\"73\" />\n", | |
" <line x1=\"10\" y1=\"4\" x2=\"80\" y2=\"74\" />\n", | |
" <line x1=\"10\" y1=\"5\" x2=\"80\" y2=\"76\" />\n", | |
" <line x1=\"10\" y1=\"6\" x2=\"80\" y2=\"77\" />\n", | |
" <line x1=\"10\" y1=\"8\" x2=\"80\" y2=\"78\" />\n", | |
" <line x1=\"10\" y1=\"9\" x2=\"80\" y2=\"80\" />\n", | |
" <line x1=\"10\" y1=\"10\" x2=\"80\" y2=\"81\" />\n", | |
" <line x1=\"10\" y1=\"12\" x2=\"80\" y2=\"82\" />\n", | |
" <line x1=\"10\" y1=\"13\" x2=\"80\" y2=\"84\" />\n", | |
" <line x1=\"10\" y1=\"14\" x2=\"80\" y2=\"85\" />\n", | |
" <line x1=\"10\" y1=\"16\" x2=\"80\" y2=\"86\" />\n", | |
" <line x1=\"10\" y1=\"17\" x2=\"80\" y2=\"88\" />\n", | |
" <line x1=\"10\" y1=\"19\" x2=\"80\" y2=\"89\" />\n", | |
" <line x1=\"10\" y1=\"20\" x2=\"80\" y2=\"90\" />\n", | |
" <line x1=\"10\" y1=\"21\" x2=\"80\" y2=\"92\" />\n", | |
" <line x1=\"10\" y1=\"23\" x2=\"80\" y2=\"93\" />\n", | |
" <line x1=\"10\" y1=\"24\" x2=\"80\" y2=\"95\" />\n", | |
" <line x1=\"10\" y1=\"25\" x2=\"80\" y2=\"96\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Vertical lines -->\n", | |
" <line x1=\"10\" y1=\"0\" x2=\"10\" y2=\"25\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"13\" y1=\"3\" x2=\"13\" y2=\"28\" />\n", | |
" <line x1=\"16\" y1=\"6\" x2=\"16\" y2=\"32\" />\n", | |
" <line x1=\"20\" y1=\"10\" x2=\"20\" y2=\"35\" />\n", | |
" <line x1=\"23\" y1=\"13\" x2=\"23\" y2=\"39\" />\n", | |
" <line x1=\"27\" y1=\"17\" x2=\"27\" y2=\"42\" />\n", | |
" <line x1=\"30\" y1=\"20\" x2=\"30\" y2=\"46\" />\n", | |
" <line x1=\"34\" y1=\"24\" x2=\"34\" y2=\"49\" />\n", | |
" <line x1=\"37\" y1=\"27\" x2=\"37\" y2=\"52\" />\n", | |
" <line x1=\"40\" y1=\"30\" x2=\"40\" y2=\"56\" />\n", | |
" <line x1=\"44\" y1=\"34\" x2=\"44\" y2=\"59\" />\n", | |
" <line x1=\"47\" y1=\"37\" x2=\"47\" y2=\"63\" />\n", | |
" <line x1=\"51\" y1=\"41\" x2=\"51\" y2=\"66\" />\n", | |
" <line x1=\"54\" y1=\"44\" x2=\"54\" y2=\"70\" />\n", | |
" <line x1=\"58\" y1=\"48\" x2=\"58\" y2=\"73\" />\n", | |
" <line x1=\"61\" y1=\"51\" x2=\"61\" y2=\"77\" />\n", | |
" <line x1=\"65\" y1=\"55\" x2=\"65\" y2=\"80\" />\n", | |
" <line x1=\"68\" y1=\"58\" x2=\"68\" y2=\"83\" />\n", | |
" <line x1=\"71\" y1=\"61\" x2=\"71\" y2=\"87\" />\n", | |
" <line x1=\"75\" y1=\"65\" x2=\"75\" y2=\"90\" />\n", | |
" <line x1=\"78\" y1=\"68\" x2=\"78\" y2=\"94\" />\n", | |
" <line x1=\"80\" y1=\"70\" x2=\"80\" y2=\"96\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Colored Rectangle -->\n", | |
" <polygon points=\"10.000000,0.000000 80.588235,70.588235 80.588235,96.000852 10.000000,25.412617\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n", | |
"\n", | |
" <!-- Horizontal lines -->\n", | |
" <line x1=\"10\" y1=\"0\" x2=\"35\" y2=\"0\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"13\" y1=\"3\" x2=\"38\" y2=\"3\" />\n", | |
" <line x1=\"16\" y1=\"6\" x2=\"42\" y2=\"6\" />\n", | |
" <line x1=\"20\" y1=\"10\" x2=\"45\" y2=\"10\" />\n", | |
" <line x1=\"23\" y1=\"13\" x2=\"49\" y2=\"13\" />\n", | |
" <line x1=\"27\" y1=\"17\" x2=\"52\" y2=\"17\" />\n", | |
" <line x1=\"30\" y1=\"20\" x2=\"56\" y2=\"20\" />\n", | |
" <line x1=\"34\" y1=\"24\" x2=\"59\" y2=\"24\" />\n", | |
" <line x1=\"37\" y1=\"27\" x2=\"62\" y2=\"27\" />\n", | |
" <line x1=\"40\" y1=\"30\" x2=\"66\" y2=\"30\" />\n", | |
" <line x1=\"44\" y1=\"34\" x2=\"69\" y2=\"34\" />\n", | |
" <line x1=\"47\" y1=\"37\" x2=\"73\" y2=\"37\" />\n", | |
" <line x1=\"51\" y1=\"41\" x2=\"76\" y2=\"41\" />\n", | |
" <line x1=\"54\" y1=\"44\" x2=\"80\" y2=\"44\" />\n", | |
" <line x1=\"58\" y1=\"48\" x2=\"83\" y2=\"48\" />\n", | |
" <line x1=\"61\" y1=\"51\" x2=\"87\" y2=\"51\" />\n", | |
" <line x1=\"65\" y1=\"55\" x2=\"90\" y2=\"55\" />\n", | |
" <line x1=\"68\" y1=\"58\" x2=\"93\" y2=\"58\" />\n", | |
" <line x1=\"71\" y1=\"61\" x2=\"97\" y2=\"61\" />\n", | |
" <line x1=\"75\" y1=\"65\" x2=\"100\" y2=\"65\" />\n", | |
" <line x1=\"78\" y1=\"68\" x2=\"104\" y2=\"68\" />\n", | |
" <line x1=\"80\" y1=\"70\" x2=\"106\" y2=\"70\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Vertical lines -->\n", | |
" <line x1=\"10\" y1=\"0\" x2=\"80\" y2=\"70\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"35\" y1=\"0\" x2=\"106\" y2=\"70\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Colored Rectangle -->\n", | |
" <polygon points=\"10.000000,0.000000 35.412617,0.000000 106.000852,70.588235 80.588235,70.588235\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n", | |
"\n", | |
" <!-- Horizontal lines -->\n", | |
" <line x1=\"80\" y1=\"70\" x2=\"106\" y2=\"70\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"80\" y1=\"71\" x2=\"106\" y2=\"71\" />\n", | |
" <line x1=\"80\" y1=\"73\" x2=\"106\" y2=\"73\" />\n", | |
" <line x1=\"80\" y1=\"74\" x2=\"106\" y2=\"74\" />\n", | |
" <line x1=\"80\" y1=\"76\" x2=\"106\" y2=\"76\" />\n", | |
" <line x1=\"80\" y1=\"77\" x2=\"106\" y2=\"77\" />\n", | |
" <line x1=\"80\" y1=\"78\" x2=\"106\" y2=\"78\" />\n", | |
" <line x1=\"80\" y1=\"80\" x2=\"106\" y2=\"80\" />\n", | |
" <line x1=\"80\" y1=\"81\" x2=\"106\" y2=\"81\" />\n", | |
" <line x1=\"80\" y1=\"82\" x2=\"106\" y2=\"82\" />\n", | |
" <line x1=\"80\" y1=\"84\" x2=\"106\" y2=\"84\" />\n", | |
" <line x1=\"80\" y1=\"85\" x2=\"106\" y2=\"85\" />\n", | |
" <line x1=\"80\" y1=\"86\" x2=\"106\" y2=\"86\" />\n", | |
" <line x1=\"80\" y1=\"88\" x2=\"106\" y2=\"88\" />\n", | |
" <line x1=\"80\" y1=\"89\" x2=\"106\" y2=\"89\" />\n", | |
" <line x1=\"80\" y1=\"90\" x2=\"106\" y2=\"90\" />\n", | |
" <line x1=\"80\" y1=\"92\" x2=\"106\" y2=\"92\" />\n", | |
" <line x1=\"80\" y1=\"93\" x2=\"106\" y2=\"93\" />\n", | |
" <line x1=\"80\" y1=\"95\" x2=\"106\" y2=\"95\" />\n", | |
" <line x1=\"80\" y1=\"96\" x2=\"106\" y2=\"96\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Vertical lines -->\n", | |
" <line x1=\"80\" y1=\"70\" x2=\"80\" y2=\"96\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"106\" y1=\"70\" x2=\"106\" y2=\"96\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Colored Rectangle -->\n", | |
" <polygon points=\"80.588235,70.588235 106.000852,70.588235 106.000852,96.000852 80.588235,96.000852\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n", | |
"\n", | |
" <!-- Text -->\n", | |
" <text x=\"93.294544\" y=\"116.000852\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >2</text>\n", | |
" <text x=\"126.000852\" y=\"83.294544\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,126.000852,83.294544)\">1142</text>\n", | |
" <text x=\"35.294118\" y=\"80.706734\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(45,35.294118,80.706734)\">10752701</text>\n", | |
"</svg>\n", | |
"</td>\n", | |
"</tr>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"dask.array<asarray, shape=(10752701, 1142, 2), dtype=int8, chunksize=(524288, 61, 2), chunktype=cupy.ndarray>" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# convert to cupy array (move data to device)\n", | |
"c_gene = genotypes.map_blocks(cp.asarray)\n", | |
"c_gene" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 2.44 s, sys: 327 ms, total: 2.76 s\n", | |
"Wall time: 14.3 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"# store data in device memory\n", | |
"_ = wait(c_gene.persist())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table>\n", | |
"<tr>\n", | |
"<td>\n", | |
"<table>\n", | |
" <thead>\n", | |
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr><th> Bytes </th><td> 98.24 GB </td> <td> 255.85 MB </td></tr>\n", | |
" <tr><th> Shape </th><td> (10752701, 1142) </td> <td> (524288, 61) </td></tr>\n", | |
" <tr><th> Count </th><td> 1996 Tasks </td><td> 399 Chunks </td></tr>\n", | |
" <tr><th> Type </th><td> int64 </td><td> cupy.ndarray </td></tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</td>\n", | |
"<td>\n", | |
"<svg width=\"75\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n", | |
"\n", | |
" <!-- Horizontal lines -->\n", | |
" <line x1=\"0\" y1=\"0\" x2=\"25\" y2=\"0\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"0\" y1=\"5\" x2=\"25\" y2=\"5\" />\n", | |
" <line x1=\"0\" y1=\"11\" x2=\"25\" y2=\"11\" />\n", | |
" <line x1=\"0\" y1=\"17\" x2=\"25\" y2=\"17\" />\n", | |
" <line x1=\"0\" y1=\"23\" x2=\"25\" y2=\"23\" />\n", | |
" <line x1=\"0\" y1=\"29\" x2=\"25\" y2=\"29\" />\n", | |
" <line x1=\"0\" y1=\"35\" x2=\"25\" y2=\"35\" />\n", | |
" <line x1=\"0\" y1=\"40\" x2=\"25\" y2=\"40\" />\n", | |
" <line x1=\"0\" y1=\"46\" x2=\"25\" y2=\"46\" />\n", | |
" <line x1=\"0\" y1=\"52\" x2=\"25\" y2=\"52\" />\n", | |
" <line x1=\"0\" y1=\"58\" x2=\"25\" y2=\"58\" />\n", | |
" <line x1=\"0\" y1=\"64\" x2=\"25\" y2=\"64\" />\n", | |
" <line x1=\"0\" y1=\"70\" x2=\"25\" y2=\"70\" />\n", | |
" <line x1=\"0\" y1=\"76\" x2=\"25\" y2=\"76\" />\n", | |
" <line x1=\"0\" y1=\"81\" x2=\"25\" y2=\"81\" />\n", | |
" <line x1=\"0\" y1=\"87\" x2=\"25\" y2=\"87\" />\n", | |
" <line x1=\"0\" y1=\"93\" x2=\"25\" y2=\"93\" />\n", | |
" <line x1=\"0\" y1=\"99\" x2=\"25\" y2=\"99\" />\n", | |
" <line x1=\"0\" y1=\"105\" x2=\"25\" y2=\"105\" />\n", | |
" <line x1=\"0\" y1=\"111\" x2=\"25\" y2=\"111\" />\n", | |
" <line x1=\"0\" y1=\"117\" x2=\"25\" y2=\"117\" />\n", | |
" <line x1=\"0\" y1=\"120\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Vertical lines -->\n", | |
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"1\" y1=\"0\" x2=\"1\" y2=\"120\" />\n", | |
" <line x1=\"2\" y1=\"0\" x2=\"2\" y2=\"120\" />\n", | |
" <line x1=\"4\" y1=\"0\" x2=\"4\" y2=\"120\" />\n", | |
" <line x1=\"5\" y1=\"0\" x2=\"5\" y2=\"120\" />\n", | |
" <line x1=\"6\" y1=\"0\" x2=\"6\" y2=\"120\" />\n", | |
" <line x1=\"8\" y1=\"0\" x2=\"8\" y2=\"120\" />\n", | |
" <line x1=\"9\" y1=\"0\" x2=\"9\" y2=\"120\" />\n", | |
" <line x1=\"10\" y1=\"0\" x2=\"10\" y2=\"120\" />\n", | |
" <line x1=\"12\" y1=\"0\" x2=\"12\" y2=\"120\" />\n", | |
" <line x1=\"13\" y1=\"0\" x2=\"13\" y2=\"120\" />\n", | |
" <line x1=\"14\" y1=\"0\" x2=\"14\" y2=\"120\" />\n", | |
" <line x1=\"16\" y1=\"0\" x2=\"16\" y2=\"120\" />\n", | |
" <line x1=\"17\" y1=\"0\" x2=\"17\" y2=\"120\" />\n", | |
" <line x1=\"19\" y1=\"0\" x2=\"19\" y2=\"120\" />\n", | |
" <line x1=\"20\" y1=\"0\" x2=\"20\" y2=\"120\" />\n", | |
" <line x1=\"21\" y1=\"0\" x2=\"21\" y2=\"120\" />\n", | |
" <line x1=\"23\" y1=\"0\" x2=\"23\" y2=\"120\" />\n", | |
" <line x1=\"24\" y1=\"0\" x2=\"24\" y2=\"120\" />\n", | |
" <line x1=\"25\" y1=\"0\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Colored Rectangle -->\n", | |
" <polygon points=\"0.000000,0.000000 25.412617,0.000000 25.412617,120.000000 0.000000,120.000000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n", | |
"\n", | |
" <!-- Text -->\n", | |
" <text x=\"12.706308\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >1142</text>\n", | |
" <text x=\"45.412617\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,45.412617,60.000000)\">10752701</text>\n", | |
"</svg>\n", | |
"</td>\n", | |
"</tr>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"dask.array<sum-aggregate, shape=(10752701, 1142), dtype=int64, chunksize=(524288, 61), chunktype=cupy.ndarray>" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# clean data\n", | |
"gn = da.sum(c_gene > 0, axis=2)\n", | |
"gn" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array(10751847)" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"is_informative = da.any(gn != gn[:, 0, None], axis=1).compute()\n", | |
"cp.count_nonzero(is_informative)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# convert conditional array to host data\n", | |
"# https://github.com/dask/dask/issues/6169\n", | |
"cond = cp.asnumpy(is_informative)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table>\n", | |
"<tr>\n", | |
"<td>\n", | |
"<table>\n", | |
" <thead>\n", | |
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr><th> Bytes </th><td> 98.23 GB </td> <td> 255.85 MB </td></tr>\n", | |
" <tr><th> Shape </th><td> (10751847, 1142) </td> <td> (524285, 61) </td></tr>\n", | |
" <tr><th> Count </th><td> 2395 Tasks </td><td> 399 Chunks </td></tr>\n", | |
" <tr><th> Type </th><td> int64 </td><td> cupy.ndarray </td></tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</td>\n", | |
"<td>\n", | |
"<svg width=\"75\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n", | |
"\n", | |
" <!-- Horizontal lines -->\n", | |
" <line x1=\"0\" y1=\"0\" x2=\"25\" y2=\"0\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"0\" y1=\"5\" x2=\"25\" y2=\"5\" />\n", | |
" <line x1=\"0\" y1=\"11\" x2=\"25\" y2=\"11\" />\n", | |
" <line x1=\"0\" y1=\"17\" x2=\"25\" y2=\"17\" />\n", | |
" <line x1=\"0\" y1=\"23\" x2=\"25\" y2=\"23\" />\n", | |
" <line x1=\"0\" y1=\"29\" x2=\"25\" y2=\"29\" />\n", | |
" <line x1=\"0\" y1=\"35\" x2=\"25\" y2=\"35\" />\n", | |
" <line x1=\"0\" y1=\"40\" x2=\"25\" y2=\"40\" />\n", | |
" <line x1=\"0\" y1=\"46\" x2=\"25\" y2=\"46\" />\n", | |
" <line x1=\"0\" y1=\"52\" x2=\"25\" y2=\"52\" />\n", | |
" <line x1=\"0\" y1=\"58\" x2=\"25\" y2=\"58\" />\n", | |
" <line x1=\"0\" y1=\"64\" x2=\"25\" y2=\"64\" />\n", | |
" <line x1=\"0\" y1=\"70\" x2=\"25\" y2=\"70\" />\n", | |
" <line x1=\"0\" y1=\"76\" x2=\"25\" y2=\"76\" />\n", | |
" <line x1=\"0\" y1=\"81\" x2=\"25\" y2=\"81\" />\n", | |
" <line x1=\"0\" y1=\"87\" x2=\"25\" y2=\"87\" />\n", | |
" <line x1=\"0\" y1=\"93\" x2=\"25\" y2=\"93\" />\n", | |
" <line x1=\"0\" y1=\"99\" x2=\"25\" y2=\"99\" />\n", | |
" <line x1=\"0\" y1=\"105\" x2=\"25\" y2=\"105\" />\n", | |
" <line x1=\"0\" y1=\"111\" x2=\"25\" y2=\"111\" />\n", | |
" <line x1=\"0\" y1=\"117\" x2=\"25\" y2=\"117\" />\n", | |
" <line x1=\"0\" y1=\"120\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Vertical lines -->\n", | |
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"1\" y1=\"0\" x2=\"1\" y2=\"120\" />\n", | |
" <line x1=\"2\" y1=\"0\" x2=\"2\" y2=\"120\" />\n", | |
" <line x1=\"4\" y1=\"0\" x2=\"4\" y2=\"120\" />\n", | |
" <line x1=\"5\" y1=\"0\" x2=\"5\" y2=\"120\" />\n", | |
" <line x1=\"6\" y1=\"0\" x2=\"6\" y2=\"120\" />\n", | |
" <line x1=\"8\" y1=\"0\" x2=\"8\" y2=\"120\" />\n", | |
" <line x1=\"9\" y1=\"0\" x2=\"9\" y2=\"120\" />\n", | |
" <line x1=\"10\" y1=\"0\" x2=\"10\" y2=\"120\" />\n", | |
" <line x1=\"12\" y1=\"0\" x2=\"12\" y2=\"120\" />\n", | |
" <line x1=\"13\" y1=\"0\" x2=\"13\" y2=\"120\" />\n", | |
" <line x1=\"14\" y1=\"0\" x2=\"14\" y2=\"120\" />\n", | |
" <line x1=\"16\" y1=\"0\" x2=\"16\" y2=\"120\" />\n", | |
" <line x1=\"17\" y1=\"0\" x2=\"17\" y2=\"120\" />\n", | |
" <line x1=\"19\" y1=\"0\" x2=\"19\" y2=\"120\" />\n", | |
" <line x1=\"20\" y1=\"0\" x2=\"20\" y2=\"120\" />\n", | |
" <line x1=\"21\" y1=\"0\" x2=\"21\" y2=\"120\" />\n", | |
" <line x1=\"23\" y1=\"0\" x2=\"23\" y2=\"120\" />\n", | |
" <line x1=\"24\" y1=\"0\" x2=\"24\" y2=\"120\" />\n", | |
" <line x1=\"25\" y1=\"0\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Colored Rectangle -->\n", | |
" <polygon points=\"0.000000,0.000000 25.412617,0.000000 25.412617,120.000000 0.000000,120.000000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n", | |
"\n", | |
" <!-- Text -->\n", | |
" <text x=\"12.706308\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >1142</text>\n", | |
" <text x=\"45.412617\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,45.412617,60.000000)\">10751847</text>\n", | |
"</svg>\n", | |
"</td>\n", | |
"</tr>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"dask.array<getitem, shape=(10751847, 1142), dtype=int64, chunksize=(524285, 61), chunktype=cupy.ndarray>" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"gn_informative = da.compress(cond, gn, axis=0)\n", | |
"gn_informative" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<table>\n", | |
"<tr>\n", | |
"<td>\n", | |
"<table>\n", | |
" <thead>\n", | |
" <tr><td> </td><th> Array </th><th> Chunk </th></tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr><th> Bytes </th><td> 98.23 GB </td> <td> 255.85 MB </td></tr>\n", | |
" <tr><th> Shape </th><td> (10751847, 1142) </td> <td> (524285, 61) </td></tr>\n", | |
" <tr><th> Count </th><td> 4348 Tasks </td><td> 399 Chunks </td></tr>\n", | |
" <tr><th> Type </th><td> float64 </td><td> cupy.ndarray </td></tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</td>\n", | |
"<td>\n", | |
"<svg width=\"75\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n", | |
"\n", | |
" <!-- Horizontal lines -->\n", | |
" <line x1=\"0\" y1=\"0\" x2=\"25\" y2=\"0\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"0\" y1=\"5\" x2=\"25\" y2=\"5\" />\n", | |
" <line x1=\"0\" y1=\"11\" x2=\"25\" y2=\"11\" />\n", | |
" <line x1=\"0\" y1=\"17\" x2=\"25\" y2=\"17\" />\n", | |
" <line x1=\"0\" y1=\"23\" x2=\"25\" y2=\"23\" />\n", | |
" <line x1=\"0\" y1=\"29\" x2=\"25\" y2=\"29\" />\n", | |
" <line x1=\"0\" y1=\"35\" x2=\"25\" y2=\"35\" />\n", | |
" <line x1=\"0\" y1=\"40\" x2=\"25\" y2=\"40\" />\n", | |
" <line x1=\"0\" y1=\"46\" x2=\"25\" y2=\"46\" />\n", | |
" <line x1=\"0\" y1=\"52\" x2=\"25\" y2=\"52\" />\n", | |
" <line x1=\"0\" y1=\"58\" x2=\"25\" y2=\"58\" />\n", | |
" <line x1=\"0\" y1=\"64\" x2=\"25\" y2=\"64\" />\n", | |
" <line x1=\"0\" y1=\"70\" x2=\"25\" y2=\"70\" />\n", | |
" <line x1=\"0\" y1=\"76\" x2=\"25\" y2=\"76\" />\n", | |
" <line x1=\"0\" y1=\"81\" x2=\"25\" y2=\"81\" />\n", | |
" <line x1=\"0\" y1=\"87\" x2=\"25\" y2=\"87\" />\n", | |
" <line x1=\"0\" y1=\"93\" x2=\"25\" y2=\"93\" />\n", | |
" <line x1=\"0\" y1=\"99\" x2=\"25\" y2=\"99\" />\n", | |
" <line x1=\"0\" y1=\"105\" x2=\"25\" y2=\"105\" />\n", | |
" <line x1=\"0\" y1=\"111\" x2=\"25\" y2=\"111\" />\n", | |
" <line x1=\"0\" y1=\"117\" x2=\"25\" y2=\"117\" />\n", | |
" <line x1=\"0\" y1=\"120\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Vertical lines -->\n", | |
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
" <line x1=\"1\" y1=\"0\" x2=\"1\" y2=\"120\" />\n", | |
" <line x1=\"2\" y1=\"0\" x2=\"2\" y2=\"120\" />\n", | |
" <line x1=\"4\" y1=\"0\" x2=\"4\" y2=\"120\" />\n", | |
" <line x1=\"5\" y1=\"0\" x2=\"5\" y2=\"120\" />\n", | |
" <line x1=\"6\" y1=\"0\" x2=\"6\" y2=\"120\" />\n", | |
" <line x1=\"8\" y1=\"0\" x2=\"8\" y2=\"120\" />\n", | |
" <line x1=\"9\" y1=\"0\" x2=\"9\" y2=\"120\" />\n", | |
" <line x1=\"10\" y1=\"0\" x2=\"10\" y2=\"120\" />\n", | |
" <line x1=\"12\" y1=\"0\" x2=\"12\" y2=\"120\" />\n", | |
" <line x1=\"13\" y1=\"0\" x2=\"13\" y2=\"120\" />\n", | |
" <line x1=\"14\" y1=\"0\" x2=\"14\" y2=\"120\" />\n", | |
" <line x1=\"16\" y1=\"0\" x2=\"16\" y2=\"120\" />\n", | |
" <line x1=\"17\" y1=\"0\" x2=\"17\" y2=\"120\" />\n", | |
" <line x1=\"19\" y1=\"0\" x2=\"19\" y2=\"120\" />\n", | |
" <line x1=\"20\" y1=\"0\" x2=\"20\" y2=\"120\" />\n", | |
" <line x1=\"21\" y1=\"0\" x2=\"21\" y2=\"120\" />\n", | |
" <line x1=\"23\" y1=\"0\" x2=\"23\" y2=\"120\" />\n", | |
" <line x1=\"24\" y1=\"0\" x2=\"24\" y2=\"120\" />\n", | |
" <line x1=\"25\" y1=\"0\" x2=\"25\" y2=\"120\" style=\"stroke-width:2\" />\n", | |
"\n", | |
" <!-- Colored Rectangle -->\n", | |
" <polygon points=\"0.000000,0.000000 25.412617,0.000000 25.412617,120.000000 0.000000,120.000000\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n", | |
"\n", | |
" <!-- Text -->\n", | |
" <text x=\"12.706308\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >1142</text>\n", | |
" <text x=\"45.412617\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,45.412617,60.000000)\">10751847</text>\n", | |
"</svg>\n", | |
"</td>\n", | |
"</tr>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"dask.array<truediv, shape=(10751847, 1142), dtype=float64, chunksize=(524285, 61), chunktype=cupy.ndarray>" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"m = da.mean(gn_informative, axis=1, keepdims=True)\n", | |
"s = da.std(gn_informative, axis=1, keepdims=True)\n", | |
"gn_scaled = (gn_informative - m) / s\n", | |
"gn_scaled" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# optional small size\n", | |
"# gn_small = gn_scaled[::1000].compute()\n", | |
"# gn_small" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# generate chunked dask arrays of many cupy random arrays\n", | |
"rs = da.random.RandomState(RandomState=cp.random.RandomState)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 6.85 s, sys: 3.1 s, total: 9.95 s\n", | |
"Wall time: 18.7 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"with performance_report(filename=\"dask-zarr-svd-report.html\"):\n", | |
" u, s, v = da.linalg.svd_compressed(gn_scaled, k=10, seed=rs)\n", | |
" v.compute()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python [conda env:20200501]", | |
"language": "python", | |
"name": "conda-env-20200501-py" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment