mrocklin/SAGA-dask0.ipynb

## SAGA-dask0.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              SAGA-dask0.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## SAGA-dask1.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              SAGA-dask1.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## SAGA-dask2.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import dask\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from dask.distributed import Client, LocalCluster\n",
    "cluster = LocalCluster(processes=False)\n",
    "client = Client(cluster)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(0)\n",
    "n_features = 1000\n",
    "n_rows = 10000\n",
    "data = [(dask.delayed(np.random.randn)(n_rows, n_features), (dask.delayed(np.random.randint)(0, 2, n_rows) - 0.5) * 2) for _ in range(10)]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from numba import njit\n",
    "from sklearn.utils.extmath import row_norms\n",
    "from sklearn.linear_model.sag import get_auto_step_size\n",
    "\n",
    "\n",
    "@njit\n",
    "def deriv_logistic(p, y):\n",
    "    # derivative of logistic loss\n",
    "    # same as in lightning (with minus sign)\n",
    "    p *= y\n",
    "    if p > 0:\n",
    "        phi = 1. / (1 + np.exp(-p))\n",
    "    else:\n",
    "        exp_t = np.exp(p)\n",
    "        phi = exp_t / (1. + exp_t)\n",
    "    return (phi - 1) * y\n",
    "\n",
    "    \n",
    "@dask.delayed(nout=3)\n",
    "@njit\n",
    "def _chunk_saga(\n",
    "    A, b, n_samples, f_deriv, x, memory_gradient, gradient_average, step_size):\n",
    "    x = x.copy()\n",
    "    gradient_average = gradient_average.copy()\n",
    "    memory_gradient = memory_gradient.copy()\n",
    "    \n",
    "    # sample randomly\n",
    "    idx = np.arange(memory_gradient.size)\n",
    "    np.random.shuffle(idx)\n",
    "\n",
    "    # .. inner iteration ..\n",
    "    for i in idx:\n",
    "        grad_i = f_deriv(np.dot(x, A[i]), b[i])\n",
    "\n",
    "        # .. update coefficients ..\n",
    "        delta = (grad_i - memory_gradient[i]) * A[i]\n",
    "        x -= step_size * (delta + gradient_average)\n",
    "\n",
    "        # .. update memory terms ..\n",
    "        gradient_average += (grad_i - memory_gradient[i]) * A[i] / n_samples\n",
    "        memory_gradient[i] = grad_i\n",
    "\n",
    "    return x, memory_gradient, gradient_average\n",
    "\n",
    "\n",
    "def full_saga(data, max_iter=100, callback=None):\n",
    "    n_samples = 0\n",
    "    for A, b in data:\n",
    "        n_samples += A.shape[0]\n",
    "    n_features = data[0][0].shape[1]\n",
    "    data = dask.persist(*data)\n",
    "    memory_gradients = [dask.delayed(np.zeros)(A.shape[0]) for (A, b) in data]\n",
    "    gradient_average = dask.delayed(np.zeros)(n_features)\n",
    "    x = dask.delayed(np.zeros)(n_features)\n",
    "    \n",
    "    steps = [dask.delayed(get_auto_step_size)(dask.delayed(row_norms)(A, squared=True).max(), 0, 'log', False) for (A, b) in data]\n",
    "    step_size = 0.3 * dask.delayed(np.min)(steps)\n",
    "\n",
    "    for _ in range(max_iter):\n",
    "        for i, (A, b) in enumerate(data):\n",
    "            x, memory_gradients[i], gradient_average = _chunk_saga(\n",
    "                    A, b, n_samples, deriv_logistic, x, memory_gradients[i],\n",
    "                    gradient_average, step_size)\n",
    "        if callback is not None:\n",
    "            cb = dask.delayed(callback)(x, data)\n",
    "        else:\n",
    "            cb = None\n",
    "        x, memory_gradients, gradient_average, step_size, cb = dask.persist(x, memory_gradients, gradient_average, step_size, cb)\n",
    "        if callback:\n",
    "            print(cb.compute())\n",
    "        \n",
    "    return x\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "@njit\n",
    "def grad_chunk(x, A, b):\n",
    "    grad = np.zeros(x.size)\n",
    "    for i in range(A.shape[0]):\n",
    "        grad += A[i] * deriv_logistic(np.dot(x, A[i]), b[i])\n",
    "    return grad\n",
    "    \n",
    "def callback(x, data):\n",
    "    grad = np.zeros(x.size)\n",
    "    for A, b in data:\n",
    "        grad += grad_chunk(x, A, b)\n",
    "    return np.linalg.norm(grad)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "out = full_saga(data, callback=callback)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"import dask\n",
	"import numpy as np"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"from dask.distributed import Client, LocalCluster\n",
	"cluster = LocalCluster(processes=False)\n",
	"client = Client(cluster)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"np.random.seed(0)\n",
	"n_features = 1000\n",
	"n_rows = 10000\n",
	"data = [(dask.delayed(np.random.randn)(n_rows, n_features), (dask.delayed(np.random.randint)(0, 2, n_rows) - 0.5) * 2) for _ in range(10)]\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"from numba import njit\n",
	"from sklearn.utils.extmath import row_norms\n",
	"from sklearn.linear_model.sag import get_auto_step_size\n",
	"\n",
	"\n",
	"@njit\n",
	"def deriv_logistic(p, y):\n",
	" # derivative of logistic loss\n",
	" # same as in lightning (with minus sign)\n",
	" p *= y\n",
	" if p > 0:\n",
	" phi = 1. / (1 + np.exp(-p))\n",
	" else:\n",
	" exp_t = np.exp(p)\n",
	" phi = exp_t / (1. + exp_t)\n",
	" return (phi - 1) * y\n",
	"\n",
	" \n",
	"@dask.delayed(nout=3)\n",
	"@njit\n",
	"def _chunk_saga(\n",
	" A, b, n_samples, f_deriv, x, memory_gradient, gradient_average, step_size):\n",
	" x = x.copy()\n",
	" gradient_average = gradient_average.copy()\n",
	" memory_gradient = memory_gradient.copy()\n",
	" \n",
	" # sample randomly\n",
	" idx = np.arange(memory_gradient.size)\n",
	" np.random.shuffle(idx)\n",
	"\n",
	" # .. inner iteration ..\n",
	" for i in idx:\n",
	" grad_i = f_deriv(np.dot(x, A[i]), b[i])\n",
	"\n",
	" # .. update coefficients ..\n",
	" delta = (grad_i - memory_gradient[i]) * A[i]\n",
	" x -= step_size * (delta + gradient_average)\n",
	"\n",
	" # .. update memory terms ..\n",
	" gradient_average += (grad_i - memory_gradient[i]) * A[i] / n_samples\n",
	" memory_gradient[i] = grad_i\n",
	"\n",
	" return x, memory_gradient, gradient_average\n",
	"\n",
	"\n",
	"def full_saga(data, max_iter=100, callback=None):\n",
	" n_samples = 0\n",
	" for A, b in data:\n",
	" n_samples += A.shape[0]\n",
	" n_features = data[0][0].shape[1]\n",
	" data = dask.persist(*data)\n",
	" memory_gradients = [dask.delayed(np.zeros)(A.shape[0]) for (A, b) in data]\n",
	" gradient_average = dask.delayed(np.zeros)(n_features)\n",
	" x = dask.delayed(np.zeros)(n_features)\n",
	" \n",
	" steps = [dask.delayed(get_auto_step_size)(dask.delayed(row_norms)(A, squared=True).max(), 0, 'log', False) for (A, b) in data]\n",
	" step_size = 0.3 * dask.delayed(np.min)(steps)\n",
	"\n",
	" for _ in range(max_iter):\n",
	" for i, (A, b) in enumerate(data):\n",
	" x, memory_gradients[i], gradient_average = _chunk_saga(\n",
	" A, b, n_samples, deriv_logistic, x, memory_gradients[i],\n",
	" gradient_average, step_size)\n",
	" if callback is not None:\n",
	" cb = dask.delayed(callback)(x, data)\n",
	" else:\n",
	" cb = None\n",
	" x, memory_gradients, gradient_average, step_size, cb = dask.persist(x, memory_gradients, gradient_average, step_size, cb)\n",
	" if callback:\n",
	" print(cb.compute())\n",
	" \n",
	" return x\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"@njit\n",
	"def grad_chunk(x, A, b):\n",
	" grad = np.zeros(x.size)\n",
	" for i in range(A.shape[0]):\n",
	" grad += A[i] * deriv_logistic(np.dot(x, A[i]), b[i])\n",
	" return grad\n",
	" \n",
	"def callback(x, data):\n",
	" grad = np.zeros(x.size)\n",
	" for A, b in data:\n",
	" grad += grad_chunk(x, A, b)\n",
	" return np.linalg.norm(grad)\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"out = full_saga(data, callback=callback)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.5"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}