Skip to content

Instantly share code, notes, and snippets.

import colorsys
def darken_and_vivid_hex_color(hex_color, darken_factor=0.2, vivid_factor=0.2):
# Remove the hash at the start if it's there
hex_color = hex_color.lstrip('#')
# Convert hex to RGB
rgb = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
# Normalize RGB values to the range [0, 1]
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [8, 8]
plt.rcParams['figure.dpi'] = 240
plt.rcParams['svg.fonttype'] = 'none'
plt.rcParams['pdf.use14corefonts'] = True
import mygene
def gene_annotations(names, map_from=['symbol', 'alias'], fields=['ensembl.gene','name','summary'], species='human'):
names = pd.Series(names)
print(f"passed {len(names)} symbols")
names_stripped = names.str.strip()
if any(names_stripped != names):
from gprofiler import GProfiler
gp = GProfiler(return_dataframe=True)
def gprofiler_orthologs(query, human_to_mouse=False, mouse_to_human=False, organism='mmusculus', target='hsapiens', returnall=False):
if isinstance(query, pd.Index): query = query.tolist()
elif isinstance(query, pd.Series): query = query.values.tolist()
q = [x for x in np.unique(query).tolist() if str(x) != 'nan']
if len(q) != len(query): print(f'{len(q)} unique of {len(query)}')
def pseudobulk_adata(adata, obs_vars):
return pd.DataFrame({index: np.squeeze(np.asarray(adata[cell_indices].X.sum(0))) for index, cell_indices in dict(adata.obs.groupby(obs_vars).groups).items()}, index=adata.var.index).T
def flat(mtx): return np.squeeze(np.asarray(mtx))
def pseudobulks(adata, by_column, do_pseudobulks_per=[], op='sum'):
# check that all the entries in for_each are really columns in adata
assert all([col in adata.obs.columns for col in do_pseudobulks_per])
def pct(floatt): return '{:.1%}'.format(floatt)
import os
os.system("printf '\a'") # or '\7'
import h5py
import numpy as np
def read_h5_to_dict(h5_path):
out_dict = {}
def add_h5_node_to_dict(name, node, out_dict=out_dict):
fullname = node.name
import scipy
import scipy.stats
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.cluster import hierarchy
def sort_df_by_hclust_olo(df, how='both', method='ward', metric='euclidean'):
'''
how={'index', 'columns', 'both'}
'''
df = df.fillna(0)
from scipy.stats import gmean
def geometric_mean(df):
'''https://www.reddit.com/r/learnpython/comments/mq5ea7/pandas_calculate_geometric_mean_while_ignoring/'''
return df.replace(0, np.nan).apply(lambda row: gmean(row[~row.isna()]), axis=1).fillna(0)