Skip to content

Instantly share code, notes, and snippets.

@kaspermunch
Last active July 16, 2024 14:52
Show Gist options
  • Save kaspermunch/eda0b23a2d1dac99b86b99ec52a9003f to your computer and use it in GitHub Desktop.
Save kaspermunch/eda0b23a2d1dac99b86b99ec52a9003f to your computer and use it in GitHub Desktop.
Fishers exact test from Google sheet columns
from scipy.stats import fisher_exact
import pandas as pd
from IPython.display import display, Markdown
def read_google_sheet():
SHEET_ID = '1JSjSLuto3jqdEnnG7JqzeC_1pUZw76n7XueVAYrUOpk'
SHEET_NAME = 'Sheet1'
url = f'https://docs.google.com/spreadsheets/d/{SHEET_ID}/gviz/tq?tqx=out:csv&sheet={SHEET_NAME}'
df = pd.read_csv(url, header=1, dtype='str')
return df.loc[:, [not x.startswith('Unnamed') for x in df.columns]]
def gene_list_names():
df = read_google_sheet()
return sorted(df.columns.tolist())
def gene_list(name):
df = read_google_sheet()
sr = df[name]
return sr[~sr.isnull()]
def fisher(a, b, background=(None, None), alternative='greater', format_case=False):
((a_name, a_set), (b_name, b_set)) = a, b
a_set = set(a_set)
b_set = set(b_set)
if background == (None, None):
background_set = a_set.union(b_set)
else:
background_name, background_set = background
background_set = set(background_set)
M = len(background_set)
N = len(background_set.intersection(a_set))
n = len(background_set.intersection(b_set))
x = len(background_set.intersection(a_set).intersection(b_set))
table = [[ x, n - x ],
[ N - x, M - (n + N) + x]]
pvalue = fisher_exact(table, alternative=alternative).pvalue
if background != (None, None):
display(Markdown(f"Among {len(background_set)} **{background_name}** genes:"))
if format_case:
display(pd.DataFrame().from_records(table,
columns=[a_name.isupper() and a_name or a_name.capitalize(),
f'Not {a_name.isupper() and a_name or a_name.lower()}'],
index=[b_name.isupper() and b_name or b_name.capitalize(),
f'Not {b_name.isupper() and b_name or b_name.lower()}']
))
else:
display(pd.DataFrame().from_records(table,
columns=[a_name, f'not {a_name}'],
index=[b_name, f'not {b_name}']
))
display(Markdown(f'**{a_name}** x **{b_name}** overlap has **p-value:** {pvalue:.2e}'))
text = ', '.join(sorted(background_set.intersection(a_set).intersection(b_set)))
display(Markdown('**overlap:**<br> ' + text))
asd_set = set(gene_list('sfari_all_conf'))
spermatid_set = set(gene_list('meritxell_spermatid_expr'))
brain_set = set(gene_list('hpa_brain_prot'))
fisher(('Brain', brain_set), ('spermatid', spermatid_set), background=('ASD', asd_set))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment