Created
August 7, 2018 12:33
-
-
Save mrocklin/5282dcf47505e2a1d214fd15c7da0ec3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import dask\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from dask.distributed import Client, LocalCluster\n", | |
"cluster = LocalCluster(processes=False)\n", | |
"client = Client(cluster)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"np.random.seed(0)\n", | |
"n_features = 1000\n", | |
"n_rows = 10000\n", | |
"data = [(dask.delayed(np.random.randn)(n_rows, n_features), (dask.delayed(np.random.randint)(0, 2, n_rows) - 0.5) * 2) for _ in range(10)]\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from numba import njit\n", | |
"from sklearn.utils.extmath import row_norms\n", | |
"from sklearn.linear_model.sag import get_auto_step_size\n", | |
"\n", | |
"\n", | |
"@njit\n", | |
"def deriv_logistic(p, y):\n", | |
" # derivative of logistic loss\n", | |
" # same as in lightning (with minus sign)\n", | |
" p *= y\n", | |
" if p > 0:\n", | |
" phi = 1. / (1 + np.exp(-p))\n", | |
" else:\n", | |
" exp_t = np.exp(p)\n", | |
" phi = exp_t / (1. + exp_t)\n", | |
" return (phi - 1) * y\n", | |
"\n", | |
" \n", | |
"@dask.delayed(nout=3)\n", | |
"@njit\n", | |
"def _chunk_saga(\n", | |
" A, b, n_samples, f_deriv, x, memory_gradient, gradient_average, step_size):\n", | |
" x = x.copy()\n", | |
" gradient_average = gradient_average.copy()\n", | |
" memory_gradient = memory_gradient.copy()\n", | |
" \n", | |
" # sample randomly\n", | |
" idx = np.arange(memory_gradient.size)\n", | |
" np.random.shuffle(idx)\n", | |
"\n", | |
" # .. inner iteration ..\n", | |
" for i in idx:\n", | |
" grad_i = f_deriv(np.dot(x, A[i]), b[i])\n", | |
"\n", | |
" # .. update coefficients ..\n", | |
" delta = (grad_i - memory_gradient[i]) * A[i]\n", | |
" x -= step_size * (delta + gradient_average)\n", | |
"\n", | |
" # .. update memory terms ..\n", | |
" gradient_average += (grad_i - memory_gradient[i]) * A[i] / n_samples\n", | |
" memory_gradient[i] = grad_i\n", | |
"\n", | |
" return x, memory_gradient, gradient_average\n", | |
"\n", | |
"\n", | |
"def full_saga(data, max_iter=100, callback=None):\n", | |
" n_samples = 0\n", | |
" for A, b in data:\n", | |
" n_samples += A.shape[0]\n", | |
" n_features = data[0][0].shape[1]\n", | |
" data = dask.persist(*data)\n", | |
" memory_gradients = [dask.delayed(np.zeros)(A.shape[0]) for (A, b) in data]\n", | |
" gradient_average = dask.delayed(np.zeros)(n_features)\n", | |
" x = dask.delayed(np.zeros)(n_features)\n", | |
" \n", | |
" steps = [dask.delayed(get_auto_step_size)(dask.delayed(row_norms)(A, squared=True).max(), 0, 'log', False) for (A, b) in data]\n", | |
" step_size = 0.3 * dask.delayed(np.min)(steps)\n", | |
"\n", | |
" for _ in range(max_iter):\n", | |
" for i, (A, b) in enumerate(data):\n", | |
" x, memory_gradients[i], gradient_average = _chunk_saga(\n", | |
" A, b, n_samples, deriv_logistic, x, memory_gradients[i],\n", | |
" gradient_average, step_size)\n", | |
" if callback is not None:\n", | |
" cb = dask.delayed(callback)(x, data)\n", | |
" else:\n", | |
" cb = None\n", | |
" x, memory_gradients, gradient_average, step_size, cb = dask.persist(x, memory_gradients, gradient_average, step_size, cb)\n", | |
" if callback:\n", | |
" print(cb.compute())\n", | |
" \n", | |
" return x\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"@njit\n", | |
"def grad_chunk(x, A, b):\n", | |
" grad = np.zeros(x.size)\n", | |
" for i in range(A.shape[0]):\n", | |
" grad += A[i] * deriv_logistic(np.dot(x, A[i]), b[i])\n", | |
" return grad\n", | |
" \n", | |
"def callback(x, data):\n", | |
" grad = np.zeros(x.size)\n", | |
" for A, b in data:\n", | |
" grad += grad_chunk(x, A, b)\n", | |
" return np.linalg.norm(grad)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"out = full_saga(data, callback=callback)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment