Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save afrendeiro/d975b994cd88ce8cfb9965e062296e83 to your computer and use it in GitHub Desktop.
Save afrendeiro/d975b994cd88ce8cfb9965e062296e83 to your computer and use it in GitHub Desktop.
Re-analyze GeoMx data from Rendeiro et al. 2021 doi:s41586-021-03475-6
"""
Re-analyze GeoMx data from Rendeiro et al. 2021 doi:s41586-021-03475-6
Requirements (Python 3.10+):
pip install \
"anndata" \
"matplotlib>=3.8.3" \
"numpy>=1.26.4" \
"pandas>=2.1.0" \
"scanpy" \
"seaborn_extensions" \
"tqdm>=4.64.1" \
"urlpath>=1.2.0"
"""
from urlpath import URL
import numpy as np
import pandas as pd
import pingouin as pg
from tqdm import tqdm
from anndata import AnnData
import scanpy as sc
import matplotlib.pyplot as plt
from seaborn_extensions import clustermap
base_url = URL("https://zenodo.org/records/4635286/files/data/geomx/")
figkws = dict(bbox_inches="tight", dpi=200)
prefix = "rendeiro_Nature_2021_covid19_paper.geomx"
# Load metadata
meta = pd.read_parquet(base_url / "metadata_matrix.pq").dropna(subset="phenotypes")
meta["patient"] = pd.Categorical(meta["sample_id"].str.split("_").str[0])
meta["location"] = pd.Categorical(meta["location"])
meta["batch"] = pd.Categorical("B" + meta["Batch"].astype(str))
voi = ["phenotypes", "location", "patient", "batch"]
# Load data
df = pd.read_parquet(base_url / "expression_matrix.pq").T.reindex(meta.index)
df = np.log1p(df)
# Visualize
fig, ax = plt.subplots(1, 1, figsize=(4, 4))
ax.scatter(df.mean(), df.std(), alpha=0.25, s=5)
ax.set(xscale="log", yscale="log", xlabel="Mean", ylabel="Standard deviation")
fig.savefig(f"{prefix}.mean_vs_std.png", **figkws)
g = clustermap(
df,
config="z",
row_colors=meta.reindex(df.index)[voi],
square=False,
)
g.ax_heatmap.set(rasterized=True)
g.fig.savefig(f"{prefix}.all_data_heatmap.png", **figkws)
a = AnnData(df.values, obs=meta)
a.var.index = df.columns
sc.pp.scale(a)
sc.pp.pca(a)
sc.pp.neighbors(a)
sc.tl.umap(a)
fig = sc.pl.umap(a, color=voi, alpha=0.75, s=25, show=False, return_fig=True)
fig.savefig(f"{prefix}.umap.svg", **figkws)
# Differential expression (Mann-Whitney U)
_res = list()
for location in meta["location"].cat.categories:
ctrl = meta.query("location == @location & phenotypes == 'Normal'").index
for phenotypes in meta["phenotypes"].cat.categories[1:]:
test = meta.query("location == @location & phenotypes == @phenotypes").index
for gene in tqdm(df.columns):
r = pg.mwu(df.loc[ctrl, gene], df.loc[test, gene]).assign(
gene=gene, phenotypes=phenotypes, location=location
)
_res.append(r)
diff = pd.concat(_res, ignore_index=True)
diff.to_csv(f"{prefix}.differential_expression_results.csv", index=False)
sel = (
diff.set_index("gene")
.groupby(voi[:2])["p-val"]
.nsmallest(10)
.unstack(2)
.columns.unique()
)
g = clustermap(
df.loc[:, sel],
config="z",
row_colors=meta.reindex(df.index)[voi],
square=False,
figsize=(12, 10),
)
g.ax_heatmap.set(rasterized=True)
g.fig.savefig(
f"{prefix}.diff_expression.diff_top10_p-val.png",
**figkws,
)
sel = (
diff.set_index("gene")
.groupby(voi[:2])["RBC"]
.nlargest(5)
.unstack(2)
.columns.unique()
.tolist()
)
sel += (
diff.set_index("gene")
.groupby(voi[:2])["RBC"]
.nsmallest(5)
.unstack(2)
.columns.unique()
.tolist()
)
g = clustermap(
df.loc[:, sel],
config="z",
row_colors=meta.reindex(df.index)[voi],
square=False,
figsize=(12, 10),
)
g.ax_heatmap.set(rasterized=True)
g.fig.savefig(
f"{prefix}.diff_expression.diff_top10_effectsize.png",
**figkws,
)
@afrendeiro
Copy link
Author

rendeiro_Nature_2021_covid19_paper geomx mean_vs_std
rendeiro_Nature_2021_covid19_paper geomx all_data_heatmap
rendeiro_Nature_2021_covid19_paper geomx umap
rendeiro_Nature_2021_covid19_paper geomx diff_expression diff_top10_p-val
rendeiro_Nature_2021_covid19_paper geomx diff_expression diff_top10_effectsize

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment