Skip to content

Instantly share code, notes, and snippets.

@mtmorgan
Created September 7, 2021 23:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mtmorgan/12a62d166520fd8223f46e7401a89522 to your computer and use it in GitHub Desktop.
Save mtmorgan/12a62d166520fd8223f46e7401a89522 to your computer and use it in GitHub Desktop.
options(width=160)
library(dplyr)
## BiocManager::install("Bioconductor/hca")
library(hca)
## projects available in the HCA
projects <- projects()
projects
## a specific project
projects |>
filter(grepl("T cell activation", projectTitle)) |>
select(projectId, projectTitle) |>
t()
projectId <- "4a95101c-9ffc-4f30-a809-f04518a23803"
## multiple organs represented...
projects |>
filter(grepl("T cell activation", projectTitle)) |>
tidyr::unnest(c(genusSpecies, samples.organ, specimens.organ))
##
## filter to a single project
##
project_filter <- filters(
projectId = list(is = projectId)
)
projects(filter = project_filter)
## actually, a lot more information available...
project_details <- projects_detail(uuid = projectId)
names(project_details)
str(project_details$samples)
## possible to specify what information is returned
## projects(filter = project_filter, as = "lol") |>
## lol_path(lol) |>
## filter(is_leaf) |>
## print(n = Inf)
project_columns <- c(
projectId = "hits[*].entryId",
"hits[*].donorOrganisms[*].biologicalSex[*]",
"hits[*].donorOrganisms[*].disease[*]",
"hits[*].donorOrganisms[*].donorCount",
"hits[*].samples[*].organ[*]"
)
projects(filter = project_filter, columns = project_columns)
##
## Information about samples in the project
##
sample_columns = c(
entryId = "hits[*].entryId",
sex = "hits[*].donorOrganisms[*].biologicalSex[*]",
age = "hits[*].donorOrganisms[*].organismAge[*].value",
"hits[*].donorOrganisms[*].id[*]",
"hits[*].samples[*].organ",
"hits[*].samples[*].id"
)
samples(filter = project_filter, columns = sample_columns)
##
## any loom files?
##
project_file_filter <- filters(
projectId = list(is = projectId),
fileFormat = list(is = "loom")
)
file_columns <- c(
fileId = "hits[*].files[*].uuid",
name = "hits[*].files[*].name",
fileFormat = "hits[*].files[*].format",
version = "hits[*].files[*].version",
projectTitle = "hits[*].projects[*].projectTitle[*]",
projectId = "hits[*].projects[*].projectId[*]",
url = "hits[*].files[*].url",
isIntermediate = "hits[*].files[*].isIntermediate",
workflow = "hits[*].protocols[*].workflow[*]"
)
project_loom_files <- files(project_file_filter, columns = file_columns)
project_loom_files
## intermediate versus !intermediate files
project_loom_files |> filter( isIntermediate) |> tidyr::unnest("workflow") |> select(fileId, workflow)
project_loom_files |> filter(!isIntermediate) |> tidyr::unnest("workflow") |> select(fileId, workflow)
## retrieve !intermediate files
loom_file_paths <-
project_loom_files |>
filter(!isIntermediate) |>
files_download() # BiocFileCache'd
## import as LoomExperiment (like SummarizedExperiment...)
loom_file <- LoomExperiment::import(loom_file_paths[1])
loom_file
## colData contains QC information about each cell...
colData(loom_file)
## ...and also an identifier, 'input_id'
unique(colData(loom_file)[,"input_id"])
## but
## - no interesting phenotype data!
## - what the #$()@$(? is an 'input_id'?
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment