Skip to content

Instantly share code, notes, and snippets.

@ivirshup
Created May 24, 2022 13:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ivirshup/3d30efdcb3080952ee1c890e8f96cbf1 to your computer and use it in GitHub Desktop.
Save ivirshup/3d30efdcb3080952ee1c890e8f96cbf1 to your computer and use it in GitHub Desktop.
jupyter-scatter + citeseq example
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "54dd22df",
"metadata": {},
"source": [
"# Setup (from muon tutorial)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "18ddfce4",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import scanpy as sc\n",
"import muon as mu\n",
"from muon import prot as pt"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "56a327af",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/isaac/github/anndata/anndata/_core/anndata.py:1830: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n",
" utils.warn_names_duplicates(\"var\")\n",
"/Users/isaac/github/mudata/mudata/_core/mudata.py:405: UserWarning: var_names are not unique. To make them unique, call `.var_names_make_unique`.\n",
" warnings.warn(\n",
"/Users/isaac/github/anndata/anndata/_core/anndata.py:1830: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n",
" utils.warn_names_duplicates(\"var\")\n",
"/Users/isaac/github/mudata/mudata/_core/mudata.py:405: UserWarning: var_names are not unique. To make them unique, call `.var_names_make_unique`.\n",
" warnings.warn(\n"
]
}
],
"source": [
"mdata = mu.read_10x_h5(\"/Users/isaac/data/5k_pbmc_protein_v3/filtered_feature_bc_matrix.h5\")\n",
"mdata_raw = mu.read_10x_h5(\"/Users/isaac/data/5k_pbmc_protein_v3/raw_feature_bc_matrix.h5\") # This can be quite slow"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "da777194",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre>MuData object with n_obs × n_vars = 5247 × 33570\n",
" var:\t&#x27;gene_ids&#x27;, &#x27;feature_types&#x27;, &#x27;genome&#x27;\n",
" 2 modalities\n",
" rna:\t5247 x 33538\n",
" var:\t&#x27;gene_ids&#x27;, &#x27;feature_types&#x27;, &#x27;genome&#x27;\n",
" prot:\t5247 x 32\n",
" var:\t&#x27;gene_ids&#x27;, &#x27;feature_types&#x27;, &#x27;genome&#x27;</pre>"
],
"text/plain": [
"MuData object with n_obs × n_vars = 5247 × 33570\n",
" var:\t'gene_ids', 'feature_types', 'genome'\n",
" 2 modalities\n",
" rna:\t5247 x 33538\n",
" var:\t'gene_ids', 'feature_types', 'genome'\n",
" prot:\t5247 x 32\n",
" var:\t'gene_ids', 'feature_types', 'genome'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mdata"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "986a7409",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.9/site-packages/muon/_prot/preproc.py:109: UserWarning: empty_counts_range values are not provided, treating all the non-cells as empty droplets\n",
" warn(\n"
]
}
],
"source": [
"pt.pp.dsb(mdata, data_raw=mdata_raw)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "b3002231",
"metadata": {},
"outputs": [],
"source": [
"prot = mdata.mod[\"prot\"]"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "8a71793f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[11.682451 , 25.302616 , 8.956743 , ..., 3.5722427, 1.2447473,\n",
" 4.7979374],\n",
" [41.978897 , 39.361183 , 6.458017 , ..., 4.1822805, 3.488111 ,\n",
" 2.703628 ],\n",
" [41.68785 , 46.374855 , 11.26968 , ..., 2.5616493, 0.8283367,\n",
" 3.9854221],\n",
" ...,\n",
" [34.96165 , 3.4794345, 31.56558 , ..., 1.8609884, 1.9400594,\n",
" 1.8395486],\n",
" [ 5.713143 , 17.781412 , 4.512927 , ..., 3.4425242, 4.967757 ,\n",
" 3.4117422],\n",
" [47.315876 , 47.650482 , 6.7013407, ..., 4.0108767, 2.5449634,\n",
" 4.6269016]], dtype=float32)"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prot.X"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "63d07e93",
"metadata": {},
"outputs": [],
"source": [
"sc.tl.pca(prot)"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "049494e2",
"metadata": {},
"outputs": [],
"source": [
"sc.pp.neighbors(prot)\n",
"sc.tl.umap(prot, random_state=1)"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "04aecf9f",
"metadata": {},
"outputs": [],
"source": [
"sc.tl.leiden(prot, resolution=0.5)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "450ba97f",
"metadata": {},
"outputs": [],
"source": [
"rna = mdata.mod[\"rna\"]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "c169081c",
"metadata": {},
"outputs": [],
"source": [
"sc.pp.normalize_total(rna)\n",
"sc.pp.log1p(rna)\n",
"sc.pp.pca(rna)\n",
"sc.pp.neighbors(rna, n_neighbors=30)\n",
"sc.tl.leiden(rna, resolution=0.5)\n",
"sc.tl.umap(rna)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "1d52c5ae",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"AnnData object with n_obs × n_vars = 5247 × 33538\n",
" obs: 'leiden'\n",
" var: 'gene_ids', 'feature_types', 'genome'\n",
" uns: 'log1p', 'pca', 'neighbors', 'leiden', 'umap'\n",
" obsm: 'X_pca', 'X_umap'\n",
" varm: 'PCs'\n",
" obsp: 'distances', 'connectivities'"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rna"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "82ed7edf",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['CD1A',\n",
" 'CD1B',\n",
" 'CD1C',\n",
" 'CD1D',\n",
" 'CD1E',\n",
" 'CD2',\n",
" 'CD2AP',\n",
" 'CD2BP2',\n",
" 'CD3D',\n",
" 'CD3E',\n",
" 'CD3EAP',\n",
" 'CD3G',\n",
" 'CD4',\n",
" 'CD5',\n",
" 'CD5L',\n",
" 'CD6',\n",
" 'CD7',\n",
" 'CD8A',\n",
" 'CD8B',\n",
" 'CD8B2',\n",
" 'CD9',\n",
" 'CD14',\n",
" 'CD19',\n",
" 'CD22',\n",
" 'CD24',\n",
" 'CD27',\n",
" 'CD28',\n",
" 'CD33',\n",
" 'CD34',\n",
" 'CD36',\n",
" 'CD37',\n",
" 'CD38',\n",
" 'CD40',\n",
" 'CD40LG',\n",
" 'CD44',\n",
" 'CD44-AS1',\n",
" 'CD46',\n",
" 'CD47',\n",
" 'CD48',\n",
" 'CD52',\n",
" 'CD53',\n",
" 'CD55',\n",
" 'CD58',\n",
" 'CD59',\n",
" 'CD63',\n",
" 'CD68',\n",
" 'CD69',\n",
" 'CD70',\n",
" 'CD72',\n",
" 'CD74',\n",
" 'CD79A',\n",
" 'CD79B',\n",
" 'CD80',\n",
" 'CD81',\n",
" 'CD81-AS1',\n",
" 'CD82',\n",
" 'CD83',\n",
" 'CD84',\n",
" 'CD86',\n",
" 'CD93',\n",
" 'CD96',\n",
" 'CD99',\n",
" 'CD99L2',\n",
" 'CD101',\n",
" 'CD109',\n",
" 'CD151',\n",
" 'CD160',\n",
" 'CD163',\n",
" 'CD163L1',\n",
" 'CD164',\n",
" 'CD164L2',\n",
" 'CD177',\n",
" 'CD180',\n",
" 'CD200',\n",
" 'CD200R1',\n",
" 'CD200R1L',\n",
" 'CD207',\n",
" 'CD209',\n",
" 'CD226',\n",
" 'CD244',\n",
" 'CD247',\n",
" 'CD248',\n",
" 'CD274',\n",
" 'CD276',\n",
" 'CD300A',\n",
" 'CD300C',\n",
" 'CD300E',\n",
" 'CD300LB',\n",
" 'CD300LD',\n",
" 'CD300LF',\n",
" 'CD300LG',\n",
" 'CD302',\n",
" 'CD320']"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"natsorted(rna.var_names[rna.var_names.str.match(r\"CD[0-9]\")])"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "b12a2a39",
"metadata": {},
"outputs": [],
"source": [
"rna.var_names_make_unique()"
]
},
{
"cell_type": "markdown",
"id": "3c4221be",
"metadata": {},
"source": [
"# Plotting"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "07e2c5e9",
"metadata": {},
"outputs": [],
"source": [
"rna_plot_df = sc.get.obs_df(rna, [\"leiden\", \"CD4\", \"CD19\"], obsm_keys=[(\"X_umap\", 0), (\"X_umap\", 1)])\n",
"prot_plot_df = sc.get.obs_df(prot, [\"leiden\", \"CD4_TotalSeqB\", \"CD19_TotalSeqB\"], obsm_keys=[(\"X_umap\", 0), (\"X_umap\", 1)])"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "62e49783",
"metadata": {},
"outputs": [],
"source": [
"import jscatter"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "cb1ec00c",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f8869d3dd0fd4cba92a68e7f24753612",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"GridBox(children=(HBox(children=(VBox(children=(Button(button_style='primary', icon='arrows', layout=Layout(wi…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"jscatter.link(\n",
" [\n",
" jscatter.Scatter(data=rna_plot_df, x=\"X_umap-0\", y=\"X_umap-1\", color_by=\"CD4\"),\n",
" jscatter.Scatter(data=rna_plot_df, x=\"X_umap-0\", y=\"X_umap-1\", color_by=\"leiden\"),\n",
" jscatter.Scatter(data=prot_plot_df, x=\"X_umap-0\", y=\"X_umap-1\", color_by=\"CD4_TotalSeqB\"),\n",
" jscatter.Scatter(data=prot_plot_df, x=\"X_umap-0\", y=\"X_umap-1\", color_by=\"leiden\"),\n",
" ],\n",
" rows=2\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3.bak"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment