kingjr/block_ridge.py

## block_ridge.py
from sklearn.linear_model import RidgeCV
import numpy as np
from scipy import linalg


class BlockRidgeCV(RidgeCV):
    def __init__(self, alphas, blocks, fit_intercept=False, **kwargs):
        super().__init__(**kwargs)
        self.fit_intercept = False
        self.alphas = alphas
        self.blocks = blocks

    def fit(self, X, Y):
        assert X.shape[1] == len(self.blocks)
        self.coef_ = block_ridge(X, Y, self.blocks, alphas)
        self.intercept_ = 0
        return self

def block_ridge(X, Y, blocks, alphas, tol=1e-15, retrain=True):
    from scipy import linalg
    from itertools import product
    from tqdm import tqdm

    # We'll optimize the regularization via (nested) CV
    train, test = slice(0, None, 2), slice(1, None, 2)

    # Eigen vectors of X @ Y
    U, s, V = linalg.svd(X, full_matrices=False)
    UY = U[train].T @ Y[train]

    # Get rid off zero-ish columns
    idx = s > tol
    s_nnz = s[idx][:, None]

    # Compute error for each combination of alpha regularization
    alpha_list = [alphas] * len(set(blocks))
    alpha_combinations = [c for c in product(*alpha_list)]
    for jdx, unique_alphas in enumerate(tqdm(alpha_combinations)):

        # build regularizer:
        # [alpha_1, alpha_1, alpha_1, ... alpha_2, alpha_2 ...]
        alpha_block = np.zeros(X.shape[1])
        for block, alpha in zip(np.unique(blocks), unique_alphas):
            col = np.where(blocks==block)[0]
            alpha_block[col] = alpha

        # Similar to sklearn
        d = np.zeros((s.size, 1), dtype=X.dtype)
        d[idx] = s_nnz / (s_nnz ** 2 + alpha_block[idx, None])
        dUY = d * UY
        beta = V.T @ dUY
        err = np.sum((Y[test] - X[test] @ beta)**2, 0)

        # Optimize regularization block for each dimension of Y independently
        # note this is diff from sklearn
        if jdx == 0:
            best_beta = beta
            best_err = err
            best_alpha = alpha_block[None, :] * np.ones((Y.shape[1], X.shape[1]))
        else:
            for col, (beta_col, err_col, best_err_col) in enumerate(zip(beta, err, best_err)):
                if err_col < best_err_col:
                    best_beta[col] = beta_col
                    best_err[col] = err_col
                    best_alpha[col] = alpha_block

    # retrain on all data with optimized regularization
    if retrain:
        UY = U.T @ Y

        for col in range(Y.shape[1]):
            d = np.zeros((s.size, 1), dtype=X.dtype)
            d[idx] = s_nnz / (s_nnz ** 2 + best_alpha[col, idx, None])  # FIXME
            dUY = d * UY[:, [col]]
            best_beta[:, [col]] = V.T @ dUY
    return best_beta

n = 1000
n_x = 200
n_y = 100

X = np.random.randn(n, n_x)
A = np.random.randn(n_y, n_x) * 1e3
A[2:] = 0
B = np.random.randn(n_y, n_x)
B[:2] = 0

W = A+B
Y = X @ W.T + np.random.randn(n, n_y)


alphas = np.logspace(-1, 3, 6)
blocks = np.r_[np.ones(2), np.ones(X.shape[1]-2)]
betas = block_ridge(X, Y, blocks, alphas)
	from sklearn.linear_model import RidgeCV
	import numpy as np
	from scipy import linalg


	class BlockRidgeCV(RidgeCV):
	def __init__(self, alphas, blocks, fit_intercept=False, **kwargs):
	super().__init__(**kwargs)
	self.fit_intercept = False
	self.alphas = alphas
	self.blocks = blocks

	def fit(self, X, Y):
	assert X.shape[1] == len(self.blocks)
	self.coef_ = block_ridge(X, Y, self.blocks, alphas)
	self.intercept_ = 0
	return self

	def block_ridge(X, Y, blocks, alphas, tol=1e-15, retrain=True):
	from scipy import linalg
	from itertools import product
	from tqdm import tqdm

	# We'll optimize the regularization via (nested) CV
	train, test = slice(0, None, 2), slice(1, None, 2)

	# Eigen vectors of X @ Y
	U, s, V = linalg.svd(X, full_matrices=False)
	UY = U[train].T @ Y[train]

	# Get rid off zero-ish columns
	idx = s > tol
	s_nnz = s[idx][:, None]

	# Compute error for each combination of alpha regularization
	alpha_list = [alphas] * len(set(blocks))
	alpha_combinations = [c for c in product(*alpha_list)]
	for jdx, unique_alphas in enumerate(tqdm(alpha_combinations)):

	# build regularizer:
	# [alpha_1, alpha_1, alpha_1, ... alpha_2, alpha_2 ...]
	alpha_block = np.zeros(X.shape[1])
	for block, alpha in zip(np.unique(blocks), unique_alphas):
	col = np.where(blocks==block)[0]
	alpha_block[col] = alpha

	# Similar to sklearn
	d = np.zeros((s.size, 1), dtype=X.dtype)
	d[idx] = s_nnz / (s_nnz ** 2 + alpha_block[idx, None])
	dUY = d * UY
	beta = V.T @ dUY
	err = np.sum((Y[test] - X[test] @ beta)**2, 0)

	# Optimize regularization block for each dimension of Y independently
	# note this is diff from sklearn
	if jdx == 0:
	best_beta = beta
	best_err = err
	best_alpha = alpha_block[None, :] * np.ones((Y.shape[1], X.shape[1]))
	else:
	for col, (beta_col, err_col, best_err_col) in enumerate(zip(beta, err, best_err)):
	if err_col < best_err_col:
	best_beta[col] = beta_col
	best_err[col] = err_col
	best_alpha[col] = alpha_block

	# retrain on all data with optimized regularization
	if retrain:
	UY = U.T @ Y

	for col in range(Y.shape[1]):
	d = np.zeros((s.size, 1), dtype=X.dtype)
	d[idx] = s_nnz / (s_nnz ** 2 + best_alpha[col, idx, None]) # FIXME
	dUY = d * UY[:, [col]]
	best_beta[:, [col]] = V.T @ dUY
	return best_beta

	n = 1000
	n_x = 200
	n_y = 100

	X = np.random.randn(n, n_x)
	A = np.random.randn(n_y, n_x) * 1e3
	A[2:] = 0
	B = np.random.randn(n_y, n_x)
	B[:2] = 0

	W = A+B
	Y = X @ W.T + np.random.randn(n, n_y)


	alphas = np.logspace(-1, 3, 6)
	blocks = np.r_[np.ones(2), np.ones(X.shape[1]-2)]
	betas = block_ridge(X, Y, blocks, alphas)