Skip to content

Instantly share code, notes, and snippets.

@IamPhytan
Last active October 25, 2020 01:45
Show Gist options
  • Save IamPhytan/bfaa9c53d98953cecebf87cc7630bfd3 to your computer and use it in GitHub Desktop.
Save IamPhytan/bfaa9c53d98953cecebf87cc7630bfd3 to your computer and use it in GitHub Desktop.
sklearn Snippets

Snippets from Scikit-Learn

Theory

Priors

Source

if self.priors is None:  # estimate priors from sample
    _, y_t = np.unique(y, return_inverse=True)  # non-negative ints
    self.priors_ = np.bincount(y_t) / float(len(y))
    else:
        self.priors_ = np.asarray(self.priors)

Data length

Source

n_samples, n_features = X.shape
n_classes = len(self.classes_)

Predict

Source

def decision_function(self, X):
    """
    Predict confidence scores for samples.
    The confidence score for a sample is the signed distance of that
    sample to the hyperplane.
    Parameters
    ----------
    X : array-like or sparse matrix, shape (n_samples, n_features)
        Samples.
    Returns
    -------
    array, shape=(n_samples,) if n_classes == 2 else (n_samples, n_classes)
        Confidence scores per (sample, class) combination. In the binary
        case, confidence score for self.classes_[1] where >0 means this
        class would be predicted.
    """
    check_is_fitted(self)

    X = check_array(X, accept_sparse='csr')

    n_features = self.coef_.shape[1]
    if X.shape[1] != n_features:
        raise ValueError("X has %d features per sample; expecting %d"
                         % (X.shape[1], n_features))

    scores = safe_sparse_dot(X, self.coef_.T,
                             dense_output=True) + self.intercept_
    return scores.ravel() if scores.shape[1] == 1 else scores

def predict(self, X):
    """
    Predict class labels for samples in X.
    Parameters
    ----------
    X : array-like or sparse matrix, shape (n_samples, n_features)
        Samples.
    Returns
    -------
    C : array, shape [n_samples]
        Predicted class label per sample.
    """
    scores = self.decision_function(X)
    if len(scores.shape) == 1:
        indices = (scores > 0).astype(int)
    else:
        indices = scores.argmax(axis=1)
    return self.classes_[indices]

Joint likelihood

https://github.com/scikit-learn/scikit-learn/blob/df61e9ed98b0777cc0962be6e2d161f4c30110fd/sklearn/naive_bayes.py#L448

def _joint_log_likelihood(self, X):
    joint_log_likelihood = []
    for i in range(np.size(self.classes_)):
        jointi = np.log(self.class_prior_[i])
        n_ij = - 0.5 * np.sum(np.log(2. * np.pi * self.sigma_[i, :]))
        n_ij -= 0.5 * np.sum(((X - self.theta_[i, :]) ** 2) /
                             (self.sigma_[i, :]), 1)
        joint_log_likelihood.append(jointi + n_ij)

    joint_log_likelihood = np.array(joint_log_likelihood).T
    return joint_log_likelihood
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment