Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@mtmorgan
Last active December 23, 2022 22:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mtmorgan/f6607493caaed612e174d350f2d93275 to your computer and use it in GitHub Desktop.
Save mtmorgan/f6607493caaed612e174d350f2d93275 to your computer and use it in GitHub Desktop.
Tabula muris liver discovery & visualization
##
## setup
##
if (!"BiocManager" %in% rownames(installed.packages()))
install.packages("BiocManager", repository = "https://cran.r-project.org")
BiocManager::install(
## only reinstall if newer version available
c("cellxgenedp", "plotly")
)
BiocManager::install(
## github package; experimental!
"mtmorgan/h5ad"
)
##
## quick start
##
library(cellxgenedp)
library(h5ad)
## filter on, e.g., Tissue == 'liver' and Organism = 'Mus', select
## first row, and click 'Quit and download
h5ad_file = cxg()
## navigate the data in R
h5ad <- h5ad(h5ad_file$local_path)
## row (gene) or column (cell) annotations are easy...
## read and visualize the 'n_genes" column (cells)
h5ad |>
column_data(j = "n_genes") |>
unlist() |>
hist(breaks = 40)
## expression values take more work, and is not currently efficient...
## which columns (cells) do we want?
is_age_30m_and_kupffer <-
h5ad |>
column_data(j = c("age", "cell_type")) |>
with(age == "30m" & cell_type == "Kupffer cell")
## which rows (genes) do want?
is_Cd36 <-
h5ad |>
row_data(j = "feature_name") |>
with(feature_name == "Cd36")
selected_cells <-
## this step is slow and could use a lot of memory at the moment...
h5ad |>
## retrieve just the cells of interest...
layer(j = is_age_30m_and_kupffer) |>
## ...then filter for rows of interest
filter(row %in% which(is_Cd36))
## histogram
selected_cells |>
pull(data) |>
hist()
##
## A longer tour
##
## 'discover' the Tabula muris collection, liver dataset, and H5AD
## file in CELLxGENE
##
library(cellxgenedp)
## find the Tabula muris 'collection_id'
tabula_muris_collection_id <-
collections() |>
filter(grepl("tabula muris", name, ignore.case = TRUE)) |>
pull(collection_id)
## find the 10x liver dataset
liver_10x <-
datasets() |>
filter(
collection_id == tabula_muris_collection_id,
facets_filter(tissue, "label", "liver"),
facets_filter(assay, "label", "10x 3' v2"),
is_primary_data == "SECONDARY"
)
liver_10x_dataset_id <- liver_10x |> pull(dataset_id)
## find the 'h5ad' files
liver_h5ad <-
files() |>
filter(
dataset_id == liver_10x_dataset_id,
filetype == "H5AD"
)
## download the file to a local cache for easy reference
liver_h5ad_file <-
liver_h5ad |>
files_download(dry.run = FALSE)
##
## manipulate the file in R
##
library(h5ad)
h5ad <- h5ad(liver_h5ad_file)
## > h5ad
## class: H5AD
## dim: 17985 x 7294
## layer names: X raw
## column_data names: age cell free_annotation method donor_id n_genes
## subtissue n_counts louvain leiden assay_ontology_term_id
## disease_ontology_term_id cell_type_ontology_term_id
## tissue_ontology_term_id development_stage_ontology_term_id
## self_reported_ethnicity_ontology_term_id sex_ontology_term_id
## is_primary_data organism_ontology_term_id suspension_type cell_type
## assay disease organism sex tissue self_reported_ethnicity
## development_stage
## row_data names: n_cells means dispersions dispersions_norm
## highly_variable feature_is_filtered feature_name feature_reference
## feature_biotype
## column_embedding: X_pca X_tsne X_umap
## row_embedding: PCs
## look at the 'n_gene' column
h5ad |>
column_data(j = "n_genes") |>
summarize(
min = min(n_genes, na.rm = TRUE),
max = max(n_genes, na.rm = TRUE)
)
## a basic histogram
h5ad |>
column_data(j = "n_genes") |>
unlist() |>
hist(breaks = 40)
##
## interactive umap visualization (requires 'plotly' package
##
if (!"plotly" %in% rownames(installed.packages()))
BiocManager::install("plotly")
## extract the 'umap'
umap <-
h5ad |>
column_embedding("X_umap", with = "cell_type")
## visualize
umap |>
## randomize points to avoid possible false sense of 'groups' due
## to overplotting
slice(sample(n())) |>
plotly::plot_ly(
x = ~ X_umap_1, y = ~ X_umap_2,
color = ~ cell_type,
colors = "Paired",
type = "scattergl", # greatly speed display
mode = "markers", opacity = 1, marker = list(size = 5L)
) |>
plotly::layout(
legend = list(itemsizing = 'constant', layers = list(opacity = 1))
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment