IamPhytan/sklearn-snippets.md

## sklearn-snippets.md

      
    Raw
  

              sklearn-snippets.md
            
          
    Snippets from Scikit-Learn

Theory


LDA QDA : https://scikit-learn.org/stable/modules/lda_qda.html#lda-qda-math
Classifier Template : https://github.com/scikit-learn-contrib/project-template/blob/master/skltemplate/_template.py

Priors

Source
if self.priors is None:  # estimate priors from sample
    _, y_t = np.unique(y, return_inverse=True)  # non-negative ints
    self.priors_ = np.bincount(y_t) / float(len(y))
    else:
        self.priors_ = np.asarray(self.priors)
Data length

Source
n_samples, n_features = X.shape
n_classes = len(self.classes_)
Predict

Source
def decision_function(self, X):
    """
    Predict confidence scores for samples.
    The confidence score for a sample is the signed distance of that
    sample to the hyperplane.
    Parameters
    ----------
    X : array-like or sparse matrix, shape (n_samples, n_features)
        Samples.
    Returns
    -------
    array, shape=(n_samples,) if n_classes == 2 else (n_samples, n_classes)
        Confidence scores per (sample, class) combination. In the binary
        case, confidence score for self.classes_[1] where >0 means this
        class would be predicted.
    """
    check_is_fitted(self)

    X = check_array(X, accept_sparse='csr')

    n_features = self.coef_.shape[1]
    if X.shape[1] != n_features:
        raise ValueError("X has %d features per sample; expecting %d"
                         % (X.shape[1], n_features))

    scores = safe_sparse_dot(X, self.coef_.T,
                             dense_output=True) + self.intercept_
    return scores.ravel() if scores.shape[1] == 1 else scores

def predict(self, X):
    """
    Predict class labels for samples in X.
    Parameters
    ----------
    X : array-like or sparse matrix, shape (n_samples, n_features)
        Samples.
    Returns
    -------
    C : array, shape [n_samples]
        Predicted class label per sample.
    """
    scores = self.decision_function(X)
    if len(scores.shape) == 1:
        indices = (scores > 0).astype(int)
    else:
        indices = scores.argmax(axis=1)
    return self.classes_[indices]
Joint likelihood

https://github.com/scikit-learn/scikit-learn/blob/df61e9ed98b0777cc0962be6e2d161f4c30110fd/sklearn/naive_bayes.py#L448
def _joint_log_likelihood(self, X):
    joint_log_likelihood = []
    for i in range(np.size(self.classes_)):
        jointi = np.log(self.class_prior_[i])
        n_ij = - 0.5 * np.sum(np.log(2. * np.pi * self.sigma_[i, :]))
        n_ij -= 0.5 * np.sum(((X - self.theta_[i, :]) ** 2) /
                             (self.sigma_[i, :]), 1)
        joint_log_likelihood.append(jointi + n_ij)

    joint_log_likelihood = np.array(joint_log_likelihood).T
    return joint_log_likelihood