Skip to content

Instantly share code, notes, and snippets.

@afrendeiro
Last active February 4, 2016 09:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save afrendeiro/bbcd7c9f76132dfbb566 to your computer and use it in GitHub Desktop.
Save afrendeiro/bbcd7c9f76132dfbb566 to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
class DifferentialRegions(object):
"""
Compute two-tailed empirical p-value for difference between values of two variables.
"""
def __init__(self, df, a, b, permutations=100, alpha=0.05, correct=True):
super(DifferentialRegions, self).__init__()
# inputs
self.df = df
self.a = self.df[a].values
self.b = self.df[b].values
# options
self.permutations = permutations
self.alpha = alpha
self.correct = correct
# Make test distribution
self.make_test_distribution()
# Make null distribution
self.make_null_distribution()
# Compute p-values
self.compute_p_values()
# Return differential regions
self.significant_features = self.df[self.significant]
def make_test_distribution(self):
"""
Generate test distribution from the difference between values of two variables.
"""
self.test = self.a - self.b
def make_null_distribution(self):
"""
Generate null distribution based on permutations of the test distribution.
"""
self.null = np.array([])
for i in range(self.permutations):
self.null = np.append(self.null, self.a - np.random.permutation(self.b))
def compute_p_values(self):
"""
Compute two-tailed p-values based on test and null distributions.
"""
total = float(self.null.shape[0])
null = abs(self.null)
test = abs(self.test)
self.p_values = map(lambda x: (null > x).sum() / total, test)
if self.correct:
from statsmodels.stats.multitest import fdrcorrection
pvals = fdrcorrection(self.p_values, self.alpha)
self.p_values = pvals[1]
self.significant = pvals[0]
else:
self.significant = (self.p_values < self.alpha)
# Get test distribution
x = pd.DataFrame(np.random.rand(10000, 2), columns=["a", "b"])
# Run
diff = DifferentialRegions(x, "a", "b")
x[diff.significant]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment