Skip to content

Instantly share code, notes, and snippets.

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [8, 8]
plt.rcParams['figure.dpi'] = 240
plt.rcParams['svg.fonttype'] = 'none'
plt.rcParams['pdf.use14corefonts'] = True
import mygene
def gene_annotations(names, map_from=['symbol', 'alias'], fields=['ensembl.gene','name','summary'], species='human'):
names = pd.Series(names)
print(f"passed {len(names)} symbols")
names_stripped = names.str.strip()
if any(names_stripped != names):
from gprofiler import GProfiler
gp = GProfiler(return_dataframe=True)
def gprofiler_orthologs(query, human_to_mouse=False, mouse_to_human=False, organism='mmusculus', target='hsapiens', returnall=False):
if isinstance(query, pd.Index): query = query.tolist()
elif isinstance(query, pd.Series): query = query.values.tolist()
q = [x for x in np.unique(query).tolist() if str(x) != 'nan']
if len(q) != len(query): print(f'{len(q)} unique of {len(query)}')
def pseudobulk_adata(adata, obs_vars):
return pd.DataFrame({index: np.squeeze(np.asarray(adata[cell_indices].X.sum(0))) for index, cell_indices in dict(adata.obs.groupby(obs_vars).groups).items()}, index=adata.var.index).T
def flat(mtx): return np.squeeze(np.asarray(mtx))
def pseudobulks(adata, by_column, do_pseudobulks_per=[], op='sum'):
# check that all the entries in for_each are really columns in adata
assert all([col in adata.obs.columns for col in do_pseudobulks_per])
def pct(floatt): return '{:.1%}'.format(floatt)
import os
os.system("printf '\a'") # or '\7'
import h5py
import numpy as np
def read_h5_to_dict(h5_path):
out_dict = {}
def add_h5_node_to_dict(name, node, out_dict=out_dict):
fullname = node.name
import scipy
import scipy.stats
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.cluster import hierarchy
def sort_df_by_hclust_olo(df, how='both', method='ward', metric='euclidean'):
'''
how={'index', 'columns', 'both'}
'''
df = df.fillna(0)
from scipy.stats import gmean
def geometric_mean(df):
'''https://www.reddit.com/r/learnpython/comments/mq5ea7/pandas_calculate_geometric_mean_while_ignoring/'''
return df.replace(0, np.nan).apply(lambda row: gmean(row[~row.isna()]), axis=1).fillna(0)
import matplotlib.pyplot as plt
from matplotlib_venn import venn2, venn3
import upsetplot
import matplotlib_inline.backend_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
plt.rcParams['figure.figsize'] = [6, 3]