Created
May 24, 2022 13:29
-
-
Save ivirshup/3d30efdcb3080952ee1c890e8f96cbf1 to your computer and use it in GitHub Desktop.
jupyter-scatter + citeseq example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "54dd22df", | |
"metadata": {}, | |
"source": [ | |
"# Setup (from muon tutorial)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "18ddfce4", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import scanpy as sc\n", | |
"import muon as mu\n", | |
"from muon import prot as pt" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "56a327af", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/isaac/github/anndata/anndata/_core/anndata.py:1830: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", | |
" utils.warn_names_duplicates(\"var\")\n", | |
"/Users/isaac/github/mudata/mudata/_core/mudata.py:405: UserWarning: var_names are not unique. To make them unique, call `.var_names_make_unique`.\n", | |
" warnings.warn(\n", | |
"/Users/isaac/github/anndata/anndata/_core/anndata.py:1830: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", | |
" utils.warn_names_duplicates(\"var\")\n", | |
"/Users/isaac/github/mudata/mudata/_core/mudata.py:405: UserWarning: var_names are not unique. To make them unique, call `.var_names_make_unique`.\n", | |
" warnings.warn(\n" | |
] | |
} | |
], | |
"source": [ | |
"mdata = mu.read_10x_h5(\"/Users/isaac/data/5k_pbmc_protein_v3/filtered_feature_bc_matrix.h5\")\n", | |
"mdata_raw = mu.read_10x_h5(\"/Users/isaac/data/5k_pbmc_protein_v3/raw_feature_bc_matrix.h5\") # This can be quite slow" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "da777194", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<pre>MuData object with n_obs × n_vars = 5247 × 33570\n", | |
" var:\t'gene_ids', 'feature_types', 'genome'\n", | |
" 2 modalities\n", | |
" rna:\t5247 x 33538\n", | |
" var:\t'gene_ids', 'feature_types', 'genome'\n", | |
" prot:\t5247 x 32\n", | |
" var:\t'gene_ids', 'feature_types', 'genome'</pre>" | |
], | |
"text/plain": [ | |
"MuData object with n_obs × n_vars = 5247 × 33570\n", | |
" var:\t'gene_ids', 'feature_types', 'genome'\n", | |
" 2 modalities\n", | |
" rna:\t5247 x 33538\n", | |
" var:\t'gene_ids', 'feature_types', 'genome'\n", | |
" prot:\t5247 x 32\n", | |
" var:\t'gene_ids', 'feature_types', 'genome'" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"mdata" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"id": "986a7409", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.9/site-packages/muon/_prot/preproc.py:109: UserWarning: empty_counts_range values are not provided, treating all the non-cells as empty droplets\n", | |
" warn(\n" | |
] | |
} | |
], | |
"source": [ | |
"pt.pp.dsb(mdata, data_raw=mdata_raw)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 44, | |
"id": "b3002231", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"prot = mdata.mod[\"prot\"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 45, | |
"id": "8a71793f", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[11.682451 , 25.302616 , 8.956743 , ..., 3.5722427, 1.2447473,\n", | |
" 4.7979374],\n", | |
" [41.978897 , 39.361183 , 6.458017 , ..., 4.1822805, 3.488111 ,\n", | |
" 2.703628 ],\n", | |
" [41.68785 , 46.374855 , 11.26968 , ..., 2.5616493, 0.8283367,\n", | |
" 3.9854221],\n", | |
" ...,\n", | |
" [34.96165 , 3.4794345, 31.56558 , ..., 1.8609884, 1.9400594,\n", | |
" 1.8395486],\n", | |
" [ 5.713143 , 17.781412 , 4.512927 , ..., 3.4425242, 4.967757 ,\n", | |
" 3.4117422],\n", | |
" [47.315876 , 47.650482 , 6.7013407, ..., 4.0108767, 2.5449634,\n", | |
" 4.6269016]], dtype=float32)" | |
] | |
}, | |
"execution_count": 45, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"prot.X" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 46, | |
"id": "63d07e93", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"sc.tl.pca(prot)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 47, | |
"id": "049494e2", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"sc.pp.neighbors(prot)\n", | |
"sc.tl.umap(prot, random_state=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 48, | |
"id": "04aecf9f", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"sc.tl.leiden(prot, resolution=0.5)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"id": "450ba97f", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"rna = mdata.mod[\"rna\"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"id": "c169081c", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"sc.pp.normalize_total(rna)\n", | |
"sc.pp.log1p(rna)\n", | |
"sc.pp.pca(rna)\n", | |
"sc.pp.neighbors(rna, n_neighbors=30)\n", | |
"sc.tl.leiden(rna, resolution=0.5)\n", | |
"sc.tl.umap(rna)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"id": "1d52c5ae", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"AnnData object with n_obs × n_vars = 5247 × 33538\n", | |
" obs: 'leiden'\n", | |
" var: 'gene_ids', 'feature_types', 'genome'\n", | |
" uns: 'log1p', 'pca', 'neighbors', 'leiden', 'umap'\n", | |
" obsm: 'X_pca', 'X_umap'\n", | |
" varm: 'PCs'\n", | |
" obsp: 'distances', 'connectivities'" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"rna" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"id": "82ed7edf", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['CD1A',\n", | |
" 'CD1B',\n", | |
" 'CD1C',\n", | |
" 'CD1D',\n", | |
" 'CD1E',\n", | |
" 'CD2',\n", | |
" 'CD2AP',\n", | |
" 'CD2BP2',\n", | |
" 'CD3D',\n", | |
" 'CD3E',\n", | |
" 'CD3EAP',\n", | |
" 'CD3G',\n", | |
" 'CD4',\n", | |
" 'CD5',\n", | |
" 'CD5L',\n", | |
" 'CD6',\n", | |
" 'CD7',\n", | |
" 'CD8A',\n", | |
" 'CD8B',\n", | |
" 'CD8B2',\n", | |
" 'CD9',\n", | |
" 'CD14',\n", | |
" 'CD19',\n", | |
" 'CD22',\n", | |
" 'CD24',\n", | |
" 'CD27',\n", | |
" 'CD28',\n", | |
" 'CD33',\n", | |
" 'CD34',\n", | |
" 'CD36',\n", | |
" 'CD37',\n", | |
" 'CD38',\n", | |
" 'CD40',\n", | |
" 'CD40LG',\n", | |
" 'CD44',\n", | |
" 'CD44-AS1',\n", | |
" 'CD46',\n", | |
" 'CD47',\n", | |
" 'CD48',\n", | |
" 'CD52',\n", | |
" 'CD53',\n", | |
" 'CD55',\n", | |
" 'CD58',\n", | |
" 'CD59',\n", | |
" 'CD63',\n", | |
" 'CD68',\n", | |
" 'CD69',\n", | |
" 'CD70',\n", | |
" 'CD72',\n", | |
" 'CD74',\n", | |
" 'CD79A',\n", | |
" 'CD79B',\n", | |
" 'CD80',\n", | |
" 'CD81',\n", | |
" 'CD81-AS1',\n", | |
" 'CD82',\n", | |
" 'CD83',\n", | |
" 'CD84',\n", | |
" 'CD86',\n", | |
" 'CD93',\n", | |
" 'CD96',\n", | |
" 'CD99',\n", | |
" 'CD99L2',\n", | |
" 'CD101',\n", | |
" 'CD109',\n", | |
" 'CD151',\n", | |
" 'CD160',\n", | |
" 'CD163',\n", | |
" 'CD163L1',\n", | |
" 'CD164',\n", | |
" 'CD164L2',\n", | |
" 'CD177',\n", | |
" 'CD180',\n", | |
" 'CD200',\n", | |
" 'CD200R1',\n", | |
" 'CD200R1L',\n", | |
" 'CD207',\n", | |
" 'CD209',\n", | |
" 'CD226',\n", | |
" 'CD244',\n", | |
" 'CD247',\n", | |
" 'CD248',\n", | |
" 'CD274',\n", | |
" 'CD276',\n", | |
" 'CD300A',\n", | |
" 'CD300C',\n", | |
" 'CD300E',\n", | |
" 'CD300LB',\n", | |
" 'CD300LD',\n", | |
" 'CD300LF',\n", | |
" 'CD300LG',\n", | |
" 'CD302',\n", | |
" 'CD320']" | |
] | |
}, | |
"execution_count": 36, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"natsorted(rna.var_names[rna.var_names.str.match(r\"CD[0-9]\")])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"id": "b12a2a39", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"rna.var_names_make_unique()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "3c4221be", | |
"metadata": {}, | |
"source": [ | |
"# Plotting" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 49, | |
"id": "07e2c5e9", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"rna_plot_df = sc.get.obs_df(rna, [\"leiden\", \"CD4\", \"CD19\"], obsm_keys=[(\"X_umap\", 0), (\"X_umap\", 1)])\n", | |
"prot_plot_df = sc.get.obs_df(prot, [\"leiden\", \"CD4_TotalSeqB\", \"CD19_TotalSeqB\"], obsm_keys=[(\"X_umap\", 0), (\"X_umap\", 1)])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"id": "62e49783", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import jscatter" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 50, | |
"id": "cb1ec00c", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "f8869d3dd0fd4cba92a68e7f24753612", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
"GridBox(children=(HBox(children=(VBox(children=(Button(button_style='primary', icon='arrows', layout=Layout(wi…" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"jscatter.link(\n", | |
" [\n", | |
" jscatter.Scatter(data=rna_plot_df, x=\"X_umap-0\", y=\"X_umap-1\", color_by=\"CD4\"),\n", | |
" jscatter.Scatter(data=rna_plot_df, x=\"X_umap-0\", y=\"X_umap-1\", color_by=\"leiden\"),\n", | |
" jscatter.Scatter(data=prot_plot_df, x=\"X_umap-0\", y=\"X_umap-1\", color_by=\"CD4_TotalSeqB\"),\n", | |
" jscatter.Scatter(data=prot_plot_df, x=\"X_umap-0\", y=\"X_umap-1\", color_by=\"leiden\"),\n", | |
" ],\n", | |
" rows=2\n", | |
")" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3.bak" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment