Last active
February 4, 2016 09:40
-
-
Save afrendeiro/bbcd7c9f76132dfbb566 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
class DifferentialRegions(object): | |
""" | |
Compute two-tailed empirical p-value for difference between values of two variables. | |
""" | |
def __init__(self, df, a, b, permutations=100, alpha=0.05, correct=True): | |
super(DifferentialRegions, self).__init__() | |
# inputs | |
self.df = df | |
self.a = self.df[a].values | |
self.b = self.df[b].values | |
# options | |
self.permutations = permutations | |
self.alpha = alpha | |
self.correct = correct | |
# Make test distribution | |
self.make_test_distribution() | |
# Make null distribution | |
self.make_null_distribution() | |
# Compute p-values | |
self.compute_p_values() | |
# Return differential regions | |
self.significant_features = self.df[self.significant] | |
def make_test_distribution(self): | |
""" | |
Generate test distribution from the difference between values of two variables. | |
""" | |
self.test = self.a - self.b | |
def make_null_distribution(self): | |
""" | |
Generate null distribution based on permutations of the test distribution. | |
""" | |
self.null = np.array([]) | |
for i in range(self.permutations): | |
self.null = np.append(self.null, self.a - np.random.permutation(self.b)) | |
def compute_p_values(self): | |
""" | |
Compute two-tailed p-values based on test and null distributions. | |
""" | |
total = float(self.null.shape[0]) | |
null = abs(self.null) | |
test = abs(self.test) | |
self.p_values = map(lambda x: (null > x).sum() / total, test) | |
if self.correct: | |
from statsmodels.stats.multitest import fdrcorrection | |
pvals = fdrcorrection(self.p_values, self.alpha) | |
self.p_values = pvals[1] | |
self.significant = pvals[0] | |
else: | |
self.significant = (self.p_values < self.alpha) | |
# Get test distribution | |
x = pd.DataFrame(np.random.rand(10000, 2), columns=["a", "b"]) | |
# Run | |
diff = DifferentialRegions(x, "a", "b") | |
x[diff.significant] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment