Last active
September 27, 2022 20:36
-
-
Save alexlenail/6f3a9a334df274cf24a25be1878504a0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def pseudobulk_adata(adata, obs_vars): | |
return pd.DataFrame({index: np.squeeze(np.asarray(adata[cell_indices].X.sum(0))) for index, cell_indices in dict(adata.obs.groupby(obs_vars).groups).items()}, index=adata.var.index).T | |
def flat(mtx): return np.squeeze(np.asarray(mtx)) | |
def pseudobulks(adata, by_column, do_pseudobulks_per=[], op='sum'): | |
# check that all the entries in for_each are really columns in adata | |
assert all([col in adata.obs.columns for col in do_pseudobulks_per]) | |
assert by_column in adata.obs.columns | |
assert op in ['sum', 'mean', 'median'] | |
assert type(do_pseudobulks_per) == list and all([(type(x) == str) for x in do_pseudobulks_per]) | |
if not np.all(np.mod(adata.X.data, 1) == 0) and op == 'sum': | |
print('Warning: non-integer entries in adata.X. Likely not counts matrix. Careful using `sum` op!', flush=True) | |
if len(do_pseudobulks_per) > 0: | |
combinations = product(*[sorted(adata.obs[col].unique()) for col in do_pseudobulks_per]) | |
views = {vals: adata[np.logical_and.reduce([(adata.obs[col] == val) for col, val in zip(do_pseudobulks_per, vals)])] for vals in combinations} | |
else: | |
views = {(): adata} | |
by_column_values = adata.obs[by_column].unique() | |
pseudobulks = {criteria: pd.DataFrame({val: flat(view[view.obs[by_column] == val].X.sum(0)) for val in by_column_values}, index=adata.var.index) for criteria, view in views.items()} | |
return pseudobulks |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment