Skip to content

Instantly share code, notes, and snippets.

@lacava
Forked from rspeare/p_values_for_logreg.py
Created March 6, 2019 15:06
Show Gist options
  • Save lacava/38159f5ecc14737d5272cbaf28f88a37 to your computer and use it in GitHub Desktop.
Save lacava/38159f5ecc14737d5272cbaf28f88a37 to your computer and use it in GitHub Desktop.
P values for sklearn logistic regression
from sklearn import linear_model
import numpy as np
import scipy.stats as stat
class LogisticReg:
"""
Wrapper Class for Logistic Regression which has the usual sklearn instance
in an attribute self.model, and pvalues, z scores and estimated
errors for each coefficient in
self.z_scores
self.p_values
self.sigma_estimates
as well as the negative hessian of the log Likelihood (Fisher information)
self.F_ij
"""
def __init__(self,*args,**kwargs):#,**kwargs):
self.model = linear_model.LogisticRegression(*args,**kwargs)#,**args)
def fit(self,X,y):
self.model.fit(X,y)
#### Get p-values for the fitted model ####
denom = (2.0*(1.0+np.cosh(self.model.decision_function(X))))
denom = np.tile(denom,(X.shape[1],1)).T
F_ij = np.dot((X/denom).T,X) ## Fisher Information Matrix
Cramer_Rao = np.linalg.inv(F_ij) ## Inverse Information Matrix
sigma_estimates = np.sqrt(np.diagonal(Cramer_Rao))
z_scores = self.model.coef_[0]/sigma_estimates # z-score for eaach model coefficient
p_values = [stat.norm.sf(abs(x))*2 for x in z_scores] ### two tailed test for p-values
self.z_scores = z_scores
self.p_values = p_values
self.sigma_estimates = sigma_estimates
self.F_ij = F_ij
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment