Forked from avidale/inequality-constraints-in-linear-regression.ipynb
Created
January 18, 2021 11:16
-
-
Save JoshuaC3/3ce811a70a4f05548fbc37933341ddc0 to your computer and use it in GitHub Desktop.
inequality constraints in linear regression.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "inequality constraints in linear regression.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/avidale/6668635c318aceebe0142de013a4cf77/inequality-constraints-in-linear-regression.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "oVlDkAHybHpj", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"We want to fit a linear model $\\hat{Y}=\\alpha + X\\beta$ with a linear constrant on $\\beta$: $A\\beta \\leq B$, where $A$ is a matrix, $B$ is a vector, and vector inequality is element-wize. \n", | |
"\n", | |
"We do it by coordinate descent, recalculating constraints for every coefficient on each step" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "vjl-v4PIazpI", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 71 | |
}, | |
"outputId": "93b0ec26-f658-4639-ad94-d29d33845ba2" | |
}, | |
"source": [ | |
"from sklearn.linear_model.base import LinearModel\n", | |
"from sklearn.base import RegressorMixin\n", | |
"from sklearn.utils import check_X_y\n", | |
"import numpy as np\n", | |
"\n", | |
"class ConstrainedLinearRegression(LinearModel, RegressorMixin):\n", | |
"\n", | |
" def __init__(self, A, B, fit_intercept=True, normalize=False, copy_X=True, tol=1e-15, lr=1.0):\n", | |
" self.A = A\n", | |
" self.B = B\n", | |
" self.fit_intercept = fit_intercept\n", | |
" self.normalize = normalize\n", | |
" self.copy_X = copy_X\n", | |
" self.tol = tol\n", | |
" self.lr = lr\n", | |
"\n", | |
" def fit(self, X, y, initial_beta=None):\n", | |
" X, y = check_X_y(\n", | |
" X, y, \n", | |
" accept_sparse=['csr', 'csc', 'coo'], \n", | |
" y_numeric=True, multi_output=False\n", | |
" )\n", | |
" X, y, X_offset, y_offset, X_scale = self._preprocess_data(\n", | |
" X, y, \n", | |
" fit_intercept=self.fit_intercept, \n", | |
" normalize=self.normalize, \n", | |
" copy=self.copy_X\n", | |
" )\n", | |
" if initial_beta is not None:\n", | |
" # providing initial_beta may be useful, \n", | |
" # if initial solution does not respect the constraints. \n", | |
" beta = initial_beta\n", | |
" else:\n", | |
" beta = np.zeros(X.shape[1]).astype(float)\n", | |
" prev_beta = beta + 1\n", | |
" hessian = np.dot(X.transpose(), X)\n", | |
" while not (np.abs(prev_beta - beta)<self.tol).all():\n", | |
" prev_beta = beta.copy()\n", | |
" for i in range(len(beta)):\n", | |
" grad = np.dot(np.dot(X,beta) - y, X)\n", | |
" max_coef = np.inf\n", | |
" min_coef = -np.inf\n", | |
" for a_row, b_value in zip(self.A, self.B):\n", | |
" if a_row[i] == 0:\n", | |
" continue\n", | |
" zero_out = beta.copy()\n", | |
" zero_out[i] = 0\n", | |
" bound = (b_value - np.dot(zero_out, a_row)) / a_row[i] \n", | |
" if a_row[i] > 0:\n", | |
" max_coef = np.minimum(max_coef, bound)\n", | |
" elif a_row[i] < 0:\n", | |
" min_coef = np.maximum(min_coef, bound)\n", | |
" assert min_coef <= max_coef, \"the constraints are inconsistent\"\n", | |
" beta[i] = np.minimum(\n", | |
" max_coef, \n", | |
" np.maximum(\n", | |
" min_coef,\n", | |
" beta[i] - (grad[i] / hessian[i,i]) * self.lr\n", | |
" )\n", | |
" )\n", | |
" self.coef_ = beta\n", | |
" self._set_intercept(X_offset, y_offset, X_scale)\n", | |
" return self " | |
], | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.6/dist-packages/sklearn/utils/deprecation.py:144: FutureWarning: The sklearn.linear_model.base module is deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.linear_model. Anything that cannot be imported from sklearn.linear_model is now part of the private API.\n", | |
" warnings.warn(message, FutureWarning)\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "E2uDGUElguDR", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"For example, we can constrain our model that \n", | |
"$$1 \\beta_1 + 1 \\beta_2 + ... + 1 \\beta_13 \\leq -7$$\n", | |
"and\n", | |
"$$-1 \\beta_1 -1 \\beta_2 - ... - 1 \\beta_13 \\leq 11 $$\n", | |
"which is a fancy way of saying that the sum of coefficients should be between -11 and -7.\n", | |
"\n", | |
"And voila! Our model respects this constraint. It is your bro." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "i1LGD8agc65t", | |
"colab_type": "code", | |
"outputId": "8ef33477-3746-492f-f2dd-9e0ee92d621b", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 119 | |
} | |
}, | |
"source": [ | |
"from sklearn.datasets import load_boston\n", | |
"from sklearn.linear_model import LinearRegression\n", | |
"X, y = load_boston(return_X_y=True)\n", | |
"model = ConstrainedLinearRegression(\n", | |
" A=np.concatenate([np.ones((1, 13)), -np.ones((1, 13))]),\n", | |
" B=np.array([-7, 11])\n", | |
")\n", | |
"model.fit(X, y)\n", | |
"print(model.intercept_)\n", | |
"print(model.coef_)\n", | |
"print(model.coef_.sum())" | |
], | |
"execution_count": 2, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"34.18314035303051\n", | |
"[-1.06490237e-01 4.66708281e-02 8.69665528e-03 2.66974122e+00\n", | |
" -1.48959727e+01 3.86142746e+00 -2.07481327e-03 -1.43212060e+00\n", | |
" 2.98392331e-01 -1.25258469e-02 -9.19338378e-01 9.47601379e-03\n", | |
" -5.25881977e-01]\n", | |
"-11.0\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "3Yvw8BdIgu_J", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"And this is the baseline: unconstrained linear regression. It does not respect the constraint. It is not your bro. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "CdAPpPrPc-a_", | |
"colab_type": "code", | |
"outputId": "0ab1335b-b3d8-49db-bc48-c6ce619772e9", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 119 | |
} | |
}, | |
"source": [ | |
"model = LinearRegression()\n", | |
"model.fit(X, y)\n", | |
"print(model.intercept_)\n", | |
"print(model.coef_)\n", | |
"print(model.coef_.sum())" | |
], | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"36.459488385090125\n", | |
"[-1.08011358e-01 4.64204584e-02 2.05586264e-02 2.68673382e+00\n", | |
" -1.77666112e+01 3.80986521e+00 6.92224640e-04 -1.47556685e+00\n", | |
" 3.06049479e-01 -1.23345939e-02 -9.52747232e-01 9.31168327e-03\n", | |
" -5.24758378e-01]\n", | |
"-13.9603981374292\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "MWATYySJCvlq", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"### The impact of \"learning rate\"\n", | |
"\n", | |
"For some inputs, coordinate descent can stuck in a suboptimal corner. To prevent it, we can decrease the learning rate." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "OdgtL5wShmZ2", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"X = np.array([[ 6.76395100e-03, 1.05127870e-02, 1.38131920e-02, 8.91216900e-03],\n", | |
"[ 3.11930700e-03, -1.18833000e-04, -5.56112100e-03,-1.40301670e-02],\n", | |
"[ 1.22898000e-02, 1.48354070e-02, 1.76495830e-02,1.46773540e-02],\n", | |
"[ 8.20155400e-03, 1.03513650e-02, 9.93534000e-03,1.05970970e-02],\n", | |
"[ 5.84342900e-03, 5.10907300e-03, 7.89939000e-03,1.37747080e-02],\n", | |
"[-1.82564300e-03, -1.21698960e-02, -2.41190460e-02,-2.72551080e-02],\n", | |
"[ 2.49304000e-05, -3.81050000e-03, -7.14523700e-03,-1.21222690e-02],\n", | |
"[ 2.20000000e-02, 3.61578590e-02, 5.27356990e-02,6.43562460e-02],\n", | |
"[ 1.04000000e-02, 1.26722980e-02, 1.50119400e-02,2.13071580e-02],\n", | |
"[ 1.13000000e-02, 1.55012380e-02, 2.93593550e-02,5.01564280e-02],\n", | |
"[ 4.31893300e-03, 8.90650800e-03, 1.62781490e-02,2.58799160e-02],\n", | |
"[-4.10587000e-05, -3.94980900e-03, -5.08411500e-03,-9.01910500e-03],\n", | |
"[-2.98000000e-04, -5.12142700e-03, -1.46469070e-02,-2.54150210e-02],\n", | |
"[ 3.04000000e-03, -3.39805700e-03, -9.74336200e-03,-1.52240690e-02],\n", | |
"[ 9.17000000e-03, 1.30487420e-02, 2.33631520e-02,2.75515380e-02],\n", | |
"[ 1.67858300e-03, -6.15091800e-03, -1.59148770e-02,-2.34790290e-02],\n", | |
"[ 7.63690800e-03, 8.28053100e-03, 5.80860500e-03,7.27193000e-04],\n", | |
"[ 1.28940800e-02, -2.54326700e-03, -1.46813370e-02,-2.31787980e-02],\n", | |
"[ 1.77329260e-02, 2.27723310e-02, 3.30301960e-02,4.45734430e-02],\n", | |
"[ 1.02051920e-02, 1.15040150e-02, 9.76096600e-03,4.02489100e-03],\n", | |
"[ 6.66632800e-03, 1.91852410e-02, 2.52234550e-02,3.31616260e-02],\n", | |
"[ 4.08310700e-03, 4.35669700e-03, -6.53384000e-04,-7.84317600e-03],\n", | |
"[ 7.79128400e-03, 9.49587000e-03, 1.26496340e-02,1.46050810e-02],\n", | |
"[ 8.28355200e-03, 7.66034500e-03, 9.65023700e-03,6.15344600e-03],\n", | |
"[ 8.79511300e-03, 1.07991700e-02, 8.79172400e-03,4.14202900e-03],\n", | |
"[ 9.26750300e-03, 1.95014440e-02, 2.70481780e-02,2.18226380e-02],\n", | |
"[ 5.34313000e-04, -4.78709000e-04, -4.36123200e-03,-1.08921810e-02],\n", | |
"[ 7.35792400e-03, 1.13625030e-02, 1.09614140e-02,8.92494200e-03],\n", | |
"[ 4.28727900e-03, 1.61191700e-03, 2.38737700e-03,7.29546800e-03],\n", | |
"[ 5.54352700e-03, 3.69901700e-03, -4.00165900e-03,-5.07037900e-03],\n", | |
"[ 4.55277800e-03, 9.19459000e-03, 8.14547700e-03,-2.54201800e-03],\n", | |
"[ 5.42147000e-03, 1.34941860e-02, 1.90196850e-02,2.05033210e-02],\n", | |
"[ 9.42743600e-03, 1.94949930e-02, 1.10851060e-02,8.66207300e-03],\n", | |
"[ 7.85459900e-03, 1.68533700e-03, 8.06887000e-04,-2.86475900e-03],\n", | |
"[-5.04015600e-03, -7.08023830e-02, -1.09417595e-01,-1.16886912e-01],\n", | |
"[ 4.09176120e-02, 7.53775410e-02, 5.31236080e-02,1.80014280e-02]])\n" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "YpYy0TPUB-rz", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"y = np.array([ 0.00981752, -0.00945781, 0.01503509, 0.01026975, 0.01106711,\n", | |
"-0.02300287, -0.00972671, 0.05655607, 0.01859124, 0.03931067,\n", | |
"0.02070886, -0.00702471, -0.01954932, -0.01169865, 0.02418464,\n", | |
"-0.01865414, 0.00299641, -0.0170793 , 0.0386599 , 0.00638496,\n", | |
"0.02901831, -0.00440659, 0.01331681, 0.00749591, 0.00638459,\n", | |
"0.02274299, -0.00743834, 0.00976891, 0.00508093, -0.00349541,\n", | |
"0.00221714, 0.01862392, 0.01185967, -0.00043719, -0.09747239,\n", | |
"0.03918195])" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "GnnzfxOiFACV", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"The constraints below enforce that the sum of all coefficients is between 0 and 1, and each individual coefficient is between 0 and 1 as well. \n", | |
"\n", | |
"We run the optimization with different learning rates, and see that with learning rate of 1 the optimization gets stuck in a suboptimal corner. However, with learning rate of 1e-4 it converges to a much better solution (and that further decreasing lr yields a negligible improvement).\n", | |
"\n", | |
"\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ZSR5oIhLCChP", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 136 | |
}, | |
"outputId": "d9172978-6a1b-4f50-f02f-54e739bd194e" | |
}, | |
"source": [ | |
"A = np.concatenate([-np.identity(X.shape[1]), np.identity(X.shape[1]), np.ones((1, X.shape[1])), -np.ones((1, X.shape[1]))])\n", | |
"B = np.concatenate((np.zeros(X.shape[1]), np.ones(X.shape[1]), np.array([1, 0])))\n", | |
"\n", | |
"for lr in [1, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6]:\n", | |
" model = ConstrainedLinearRegression(\n", | |
" A=A,\n", | |
" B=B,\n", | |
" lr=lr,\n", | |
" )\n", | |
" model.fit(X, y)\n", | |
" print(lr, ((model.predict(X) - y)**2).sum()**0.5, model.coef_, model.coef_.sum())" | |
], | |
"execution_count": 32, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"1 0.12359713317487613 [1. 0. 0. 0.] 1.0\n", | |
"0.1 0.06453175639689608 [0.44664155 0.22728538 0.18483899 0.14123408] 1.0\n", | |
"0.01 0.057848955909340485 [0.4026343 0.22291547 0.19637926 0.17807097] 1.0\n", | |
"0.001 0.0571191533538363 [0.39817337 0.22207123 0.19708779 0.1826676 ] 1.0\n", | |
"0.0001 0.05706354120505105 [0.39777213 0.22207764 0.19722112 0.18292911] 1.0\n", | |
"1e-05 0.05705461221555 [0.39771485 0.22206381 0.19725026 0.18297108] 1.0\n", | |
"1e-06 0.05705397400955906 [0.3977102 0.22206391 0.19725191 0.18297398] 1.0\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "BIL-Kqh0HMMh", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"Unconstrained linear regression gives a solution with much much better predictive power. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "KUOnF0bzDlVL", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "2a839f4f-1239-4dfb-992a-04d13ef03c41" | |
}, | |
"source": [ | |
"model = LinearRegression()\n", | |
"model.fit(X, y)\n", | |
"print(((model.predict(X) - y)**2).sum()**0.5, model.coef_, model.coef_.sum())\n", | |
"old_coef = model.coef_" | |
], | |
"execution_count": 36, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"0.0046915108138479355 [0.18638468 0.2053277 0.07690013 0.64233345] 1.1109459580593493\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "bvn2O29RH0_W", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"We can use the solution of the unconstrained model (after normalization) as a seed for the constrained model. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "1Hx7W7j8HLgo", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
}, | |
"outputId": "de2f7b2d-4899-4edc-9f13-b07317d4a502" | |
}, | |
"source": [ | |
"model = ConstrainedLinearRegression(\n", | |
" A=A,\n", | |
" B=B,\n", | |
" lr=1e-4,\n", | |
")\n", | |
"model.fit(X, y, initial_beta=old_coef / sum(old_coef))\n", | |
"print(((model.predict(X) - y)**2).sum()**0.5, model.coef_, model.coef_.sum())" | |
], | |
"execution_count": 39, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"0.01568685482946043 [0.16777115 0.1848224 0.06922041 0.57818605] 1.0\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "znP6Ubp1Hki1", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment