Skip to content

Instantly share code, notes, and snippets.

@nkt1546789
Last active April 12, 2019 20:16
  • Star 7 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save nkt1546789/9fbbf2f450779bde60c3 to your computer and use it in GitHub Desktop.
A wrapper class for PU classification on Python (proposed by Elkan and Noto, 2008).
import numpy as np
from numpy import random
from sklearn import base
class PUWrapper(object):
def __init__(self,trad_clf,n_fold=5):
self._trad_clf=trad_clf
self._n_fold=n_fold
def fit(self,X,s):
self._trad_clf.fit(X,s)
Xp=X[s==1]
n=len(Xp)
cv_split=np.arange(n)*self._n_fold/n
cv_index=cv_split[random.permutation(n)]
cs=np.zeros(self._n_fold)
for k in xrange(self._n_fold):
Xptr=Xp[cv_index==k]
cs[k]=np.mean(self._trad_clf.predict_proba(Xptr)[:,1])
self.c_=cs.mean()
return self
def predict_proba(self,X):
proba=self._trad_clf.predict_proba(X)
return proba
def predict(self,X):
proba=self.predict_proba(X)[:,1]
return proba>=(0.5*self.c_)
@guicho271828
Copy link

why did you split the examples by n_fold, take the mean for each split and then take the whole mean? The result should be the same as the mean of the whole data.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment