Alexander Lenail alexlenail

## default_notebook_first_cell.py
import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = [8, 8]
plt.rcParams['figure.dpi'] = 240
plt.rcParams['svg.fonttype'] = 'none'
plt.rcParams['pdf.use14corefonts'] = True

## gene_annotations.py
import mygene

def gene_annotations(names, map_from=['symbol', 'alias'], fields=['ensembl.gene','name','summary'], species='human'):

    names = pd.Series(names)

    print(f"passed {len(names)} symbols")

    names_stripped = names.str.strip()
    if any(names_stripped != names):

## orthologs.py
from gprofiler import GProfiler
gp = GProfiler(return_dataframe=True)

def gprofiler_orthologs(query, human_to_mouse=False, mouse_to_human=False, organism='mmusculus', target='hsapiens', returnall=False):

    if isinstance(query, pd.Index): query = query.tolist()
    elif isinstance(query, pd.Series): query = query.values.tolist()

    q = [x for x in np.unique(query).tolist() if str(x) != 'nan']
    if len(q) != len(query): print(f'{len(q)} unique of {len(query)}')

## pseudobulk_adata.py
def pseudobulk_adata(adata, obs_vars):

    return pd.DataFrame({index: np.squeeze(np.asarray(adata[cell_indices].X.sum(0))) for index, cell_indices in dict(adata.obs.groupby(obs_vars).groups).items()}, index=adata.var.index).T

def flat(mtx): return np.squeeze(np.asarray(mtx))

def pseudobulks(adata, by_column, do_pseudobulks_per=[], op='sum'):

    # check that all the entries in for_each are really columns in adata
    assert all([col in adata.obs.columns for col in do_pseudobulks_per])

## pct.py
def pct(floatt): return '{:.1%}'.format(floatt)

## beep.py
import os
os.system("printf '\a'") # or '\7'

## read_h5_to_dict.py
import h5py
import numpy as np

def read_h5_to_dict(h5_path):

    out_dict = {}

    def add_h5_node_to_dict(name, node, out_dict=out_dict):

        fullname = node.name

## sort_df_by_hclust_olo.py
import scipy
import scipy.stats
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.cluster import hierarchy

def sort_df_by_hclust_olo(df, how='both', method='ward', metric='euclidean'):
    '''
    how={'index', 'columns', 'both'}
    '''
    df = df.fillna(0)

## geometric_mean.py
from scipy.stats import gmean
def geometric_mean(df):
    '''https://www.reddit.com/r/learnpython/comments/mq5ea7/pandas_calculate_geometric_mean_while_ignoring/'''
    return df.replace(0, np.nan).apply(lambda row: gmean(row[~row.isna()]), axis=1).fillna(0)

## venn.py
import matplotlib.pyplot as plt
from matplotlib_venn import venn2, venn3
import upsetplot

import matplotlib_inline.backend_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')
%config InlineBackend.figure_format = 'retina'
%matplotlib inline

plt.rcParams['figure.figsize'] = [6, 3]
	import numpy as np
	import pandas as pd

	import matplotlib
	import matplotlib.pyplot as plt

	plt.rcParams['figure.figsize'] = [8, 8]
	plt.rcParams['figure.dpi'] = 240
	plt.rcParams['svg.fonttype'] = 'none'
	plt.rcParams['pdf.use14corefonts'] = True
	import mygene

	def gene_annotations(names, map_from=['symbol', 'alias'], fields=['ensembl.gene','name','summary'], species='human'):

	names = pd.Series(names)

	print(f"passed {len(names)} symbols")

	names_stripped = names.str.strip()
	if any(names_stripped != names):
	from gprofiler import GProfiler
	gp = GProfiler(return_dataframe=True)

	def gprofiler_orthologs(query, human_to_mouse=False, mouse_to_human=False, organism='mmusculus', target='hsapiens', returnall=False):

	if isinstance(query, pd.Index): query = query.tolist()
	elif isinstance(query, pd.Series): query = query.values.tolist()

	q = [x for x in np.unique(query).tolist() if str(x) != 'nan']
	if len(q) != len(query): print(f'{len(q)} unique of {len(query)}')
	def pseudobulk_adata(adata, obs_vars):

	return pd.DataFrame({index: np.squeeze(np.asarray(adata[cell_indices].X.sum(0))) for index, cell_indices in dict(adata.obs.groupby(obs_vars).groups).items()}, index=adata.var.index).T

	def flat(mtx): return np.squeeze(np.asarray(mtx))

	def pseudobulks(adata, by_column, do_pseudobulks_per=[], op='sum'):

	# check that all the entries in for_each are really columns in adata
	assert all([col in adata.obs.columns for col in do_pseudobulks_per])
	import h5py
	import numpy as np

	def read_h5_to_dict(h5_path):

	out_dict = {}

	def add_h5_node_to_dict(name, node, out_dict=out_dict):

	fullname = node.name
	import scipy
	import scipy.stats
	from scipy.cluster.hierarchy import dendrogram, linkage
	from scipy.cluster import hierarchy

	def sort_df_by_hclust_olo(df, how='both', method='ward', metric='euclidean'):
	'''
	how={'index', 'columns', 'both'}
	'''
	df = df.fillna(0)
	from scipy.stats import gmean
	def geometric_mean(df):
	'''https://www.reddit.com/r/learnpython/comments/mq5ea7/pandas_calculate_geometric_mean_while_ignoring/'''
	return df.replace(0, np.nan).apply(lambda row: gmean(row[~row.isna()]), axis=1).fillna(0)
	import matplotlib.pyplot as plt
	from matplotlib_venn import venn2, venn3
	import upsetplot

	import matplotlib_inline.backend_inline
	matplotlib_inline.backend_inline.set_matplotlib_formats('svg')
	%config InlineBackend.figure_format = 'retina'
	%matplotlib inline

	plt.rcParams['figure.figsize'] = [6, 3]