Skip to content

Instantly share code, notes, and snippets.

@alexlenail
Last active September 27, 2022 20:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alexlenail/6f3a9a334df274cf24a25be1878504a0 to your computer and use it in GitHub Desktop.
Save alexlenail/6f3a9a334df274cf24a25be1878504a0 to your computer and use it in GitHub Desktop.
def pseudobulk_adata(adata, obs_vars):
return pd.DataFrame({index: np.squeeze(np.asarray(adata[cell_indices].X.sum(0))) for index, cell_indices in dict(adata.obs.groupby(obs_vars).groups).items()}, index=adata.var.index).T
def flat(mtx): return np.squeeze(np.asarray(mtx))
def pseudobulks(adata, by_column, do_pseudobulks_per=[], op='sum'):
# check that all the entries in for_each are really columns in adata
assert all([col in adata.obs.columns for col in do_pseudobulks_per])
assert by_column in adata.obs.columns
assert op in ['sum', 'mean', 'median']
assert type(do_pseudobulks_per) == list and all([(type(x) == str) for x in do_pseudobulks_per])
if not np.all(np.mod(adata.X.data, 1) == 0) and op == 'sum':
print('Warning: non-integer entries in adata.X. Likely not counts matrix. Careful using `sum` op!', flush=True)
if len(do_pseudobulks_per) > 0:
combinations = product(*[sorted(adata.obs[col].unique()) for col in do_pseudobulks_per])
views = {vals: adata[np.logical_and.reduce([(adata.obs[col] == val) for col, val in zip(do_pseudobulks_per, vals)])] for vals in combinations}
else:
views = {(): adata}
by_column_values = adata.obs[by_column].unique()
pseudobulks = {criteria: pd.DataFrame({val: flat(view[view.obs[by_column] == val].X.sum(0)) for val in by_column_values}, index=adata.var.index) for criteria, view in views.items()}
return pseudobulks
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment