Skip to content

Instantly share code, notes, and snippets.

@endrebak
Last active December 11, 2019 13:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save endrebak/8a625c3871742671bbbefd08ba948cda to your computer and use it in GitHub Desktop.
Save endrebak/8a625c3871742671bbbefd08ba948cda to your computer and use it in GitHub Desktop.
def table(region, gwas_minus_region, total_gwas):
pos_cols = "Chromosome Start End".split()
total_region = len(region.drop_duplicates(pos_cols))
tp = region.groupby("Trait").size()
fp = (tp - total_region).abs()
fn = gwas_minus_region.groupby("Trait").size()
tn = ((fn + tp + fp) - total_gwas).abs()
df = pd.concat([tp, fn, fp, tn], axis=1)
df.columns = "TP FP TN FN".split()
return df
def analyse_regions(region, gwas, name=None):
# print("Finding the intersection between the GWAS and regions.", file=sys.stderr)
region = gwas.overlap(region)
if not len(region):
print("No regions overlapping the GWAS catalog for {}.".format(name), file=sys.stderr)
return pd.DataFrame()
# need to remove those traits mapping multiple places to same genomic position
drop_duplicates = lambda df: df.drop_duplicates("Chromosome Start End Trait".split())
region = region.apply(drop_duplicates)
gwas_minus_region = gwas.subtract(region).apply(drop_duplicates)
# print("Finding the counts of SNPs in the regions for each of the {} terms.".format(len(region)), file=sys.stderr)
df = table(region.df, gwas_minus_region.df, len(gwas.apply(drop_duplicates)))
from pyranges.statistics import fisher_exact, fdr
fe = fisher_exact(df.TP, df.FP, df.TN, df.FN, pseudocount=0.01)
df.insert(0, "OR", fe.OR.values)
df.insert(0, "P", fe.P.values)
df.insert(0, "FDR", fdr(fe.P.values))
df = df.sort_values("P")
if not (name is None):
df.insert(df.shape[1], "Name", name)
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment