Alexander Lenail alexlenail

## darken_and_vivid_hex_color.py
import colorsys

def darken_and_vivid_hex_color(hex_color, darken_factor=0.2, vivid_factor=0.2):
    # Remove the hash at the start if it's there
    hex_color = hex_color.lstrip('#')

    # Convert hex to RGB
    rgb = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))

    # Normalize RGB values to the range [0, 1]

## default_notebook_first_cell.py
import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = [8, 8]
plt.rcParams['figure.dpi'] = 240
plt.rcParams['svg.fonttype'] = 'none'
plt.rcParams['pdf.use14corefonts'] = True

## gene_annotations.py
import mygene

def gene_annotations(names, map_from=['symbol', 'alias'], fields=['ensembl.gene','name','summary'], species='human'):

    names = pd.Series(names)

    print(f"passed {len(names)} symbols")

    names_stripped = names.str.strip()
    if any(names_stripped != names):

## orthologs.py
from gprofiler import GProfiler
gp = GProfiler(return_dataframe=True)

def gprofiler_orthologs(query, human_to_mouse=False, mouse_to_human=False, organism='mmusculus', target='hsapiens', returnall=False):

    if isinstance(query, pd.Index): query = query.tolist()
    elif isinstance(query, pd.Series): query = query.values.tolist()

    q = [x for x in np.unique(query).tolist() if str(x) != 'nan']
    if len(q) != len(query): print(f'{len(q)} unique of {len(query)}')

## pseudobulk_adata.py
def pseudobulk_adata(adata, obs_vars):

    return pd.DataFrame({index: np.squeeze(np.asarray(adata[cell_indices].X.sum(0))) for index, cell_indices in dict(adata.obs.groupby(obs_vars).groups).items()}, index=adata.var.index).T

def flat(mtx): return np.squeeze(np.asarray(mtx))

def pseudobulks(adata, by_column, do_pseudobulks_per=[], op='sum'):

    # check that all the entries in for_each are really columns in adata
    assert all([col in adata.obs.columns for col in do_pseudobulks_per])

## pct.py
def pct(floatt): return '{:.1%}'.format(floatt)

## beep.py
import os
os.system("printf '\a'") # or '\7'

## read_h5_to_dict.py
import h5py
import numpy as np

def read_h5_to_dict(h5_path):

    out_dict = {}

    def add_h5_node_to_dict(name, node, out_dict=out_dict):

        fullname = node.name

## sort_df_by_hclust_olo.py
import scipy
import scipy.stats
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.cluster import hierarchy

def sort_df_by_hclust_olo(df, how='both', method='ward', metric='euclidean'):
    '''
    how={'index', 'columns', 'both'}
    '''
    df = df.fillna(0)

## geometric_mean.py
from scipy.stats import gmean
def geometric_mean(df):
    '''https://www.reddit.com/r/learnpython/comments/mq5ea7/pandas_calculate_geometric_mean_while_ignoring/'''
    return df.replace(0, np.nan).apply(lambda row: gmean(row[~row.isna()]), axis=1).fillna(0)
	import colorsys

	def darken_and_vivid_hex_color(hex_color, darken_factor=0.2, vivid_factor=0.2):
	# Remove the hash at the start if it's there
	hex_color = hex_color.lstrip('#')

	# Convert hex to RGB
	rgb = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))

	# Normalize RGB values to the range [0, 1]
	import numpy as np
	import pandas as pd

	import matplotlib
	import matplotlib.pyplot as plt

	plt.rcParams['figure.figsize'] = [8, 8]
	plt.rcParams['figure.dpi'] = 240
	plt.rcParams['svg.fonttype'] = 'none'
	plt.rcParams['pdf.use14corefonts'] = True
	import mygene

	def gene_annotations(names, map_from=['symbol', 'alias'], fields=['ensembl.gene','name','summary'], species='human'):

	names = pd.Series(names)

	print(f"passed {len(names)} symbols")

	names_stripped = names.str.strip()
	if any(names_stripped != names):
	from gprofiler import GProfiler
	gp = GProfiler(return_dataframe=True)

	def gprofiler_orthologs(query, human_to_mouse=False, mouse_to_human=False, organism='mmusculus', target='hsapiens', returnall=False):

	if isinstance(query, pd.Index): query = query.tolist()
	elif isinstance(query, pd.Series): query = query.values.tolist()

	q = [x for x in np.unique(query).tolist() if str(x) != 'nan']
	if len(q) != len(query): print(f'{len(q)} unique of {len(query)}')
	def pseudobulk_adata(adata, obs_vars):

	return pd.DataFrame({index: np.squeeze(np.asarray(adata[cell_indices].X.sum(0))) for index, cell_indices in dict(adata.obs.groupby(obs_vars).groups).items()}, index=adata.var.index).T

	def flat(mtx): return np.squeeze(np.asarray(mtx))

	def pseudobulks(adata, by_column, do_pseudobulks_per=[], op='sum'):

	# check that all the entries in for_each are really columns in adata
	assert all([col in adata.obs.columns for col in do_pseudobulks_per])
	import h5py
	import numpy as np

	def read_h5_to_dict(h5_path):

	out_dict = {}

	def add_h5_node_to_dict(name, node, out_dict=out_dict):

	fullname = node.name
	import scipy
	import scipy.stats
	from scipy.cluster.hierarchy import dendrogram, linkage
	from scipy.cluster import hierarchy

	def sort_df_by_hclust_olo(df, how='both', method='ward', metric='euclidean'):
	'''
	how={'index', 'columns', 'both'}
	'''
	df = df.fillna(0)
	from scipy.stats import gmean
	def geometric_mean(df):
	'''https://www.reddit.com/r/learnpython/comments/mq5ea7/pandas_calculate_geometric_mean_while_ignoring/'''
	return df.replace(0, np.nan).apply(lambda row: gmean(row[~row.isna()]), axis=1).fillna(0)