Skip to content

Instantly share code, notes, and snippets.

@MInner
Last active November 7, 2015 02:13
Show Gist options
  • Save MInner/5ea6cd3ea2ead984075e to your computer and use it in GitHub Desktop.
Save MInner/5ea6cd3ea2ead984075e to your computer and use it in GitHub Desktop.
(Example) Optimization Using Theano
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import abc\n",
"import numpy as np\n",
"import theano\n",
"import theano.tensor as T"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def warp(f):\n",
" if isinstance(f, (int, long, float, complex)): # only one num\n",
" return lambda k: f\n",
" elif isinstance(f, type(lambda:0)):\n",
" try:\n",
" return f\n",
" except:\n",
" raise ValueError, \"error while computing: gd_step(k) function should follow prototype: int -> float\"\n",
" else:\n",
" raise ValueError, \"gd_step must be either a float or int -> float\"\n",
"\n",
"class methods(object):\n",
" class OptimizationMethod(object):\n",
" __metaclass__ = abc.ABCMeta\n",
" @abc.abstractmethod\n",
" def __call__(self, param, dparam, t):\n",
" \"\"\"\n",
" Sould return list of updated version of param given param, \n",
" derivateve of loss wrt that param and step number \n",
" \"\"\"\n",
" pass\n",
" \n",
" class GD(OptimizationMethod):\n",
" def __init__(self, gd_step):\n",
" self.step = warp(gd_step)\n",
" \n",
" def __call__(self, param, dparam, t):\n",
" return [(param, param - self.step(t)*dparam)]\n",
" \n",
" class Momentum(OptimizationMethod):\n",
" \"\"\"\n",
" v_{t+1} = alpha * dparam_t + beta*v_t\n",
" param_{t+1} = param_t - v_{t+1}\n",
" \"\"\"\n",
" def __init__(self, alpha, beta):\n",
" self.alpha = warp(alpha)\n",
" self.beta = warp(beta)\n",
" self.__inited = False\n",
" \n",
" def __call__(self, param, dparam, t):\n",
" if not self.__inited:\n",
" self.v_t = theano.shared(name='v_t_%s' % param.name, value = np.zeros_like(param.get_value()))\n",
" self.__inited = True\n",
" \n",
" return [(self.v_t, self.alpha(t) * dparam + self.beta(t) * self.v_t), \n",
" (param, param - self.v_t)]\n",
" \n",
" class RMSprop(OptimizationMethod):\n",
" \"\"\"\n",
" r_t = (1-gamma)*df^2 + gamma*r_{t-1}\n",
" v_{t+1} = alpha/sqrt(r_t) * df + beta*v_t\n",
" param = param - v_{t+1}\n",
" \"\"\"\n",
" def __init__(self, alpha, beta, gamma, e = 1e-3):\n",
" self.alpha = warp(alpha)\n",
" self.beta = warp(beta) # momentum = 0 by default\n",
" self.gamma = warp(gamma)\n",
" self.e = e\n",
" self.__inited = False\n",
" \n",
" def __call__(self, param, dparam, t):\n",
" if not self.__inited:\n",
" self.v_t = theano.shared(name='v_t_%s' % param.name, value = np.zeros_like(param.get_value()))\n",
" self.r_t = theano.shared(name='r_t_%s' % param.name, value = np.zeros_like(param.get_value()))\n",
" self.__inited = True\n",
" \n",
" return [(self.r_t, (1-self.gamma(t)) * dparam**2 + self.gamma(t)*self.r_t),\n",
" (self.v_t, self.alpha(t)/(T.sqrt(self.r_t + self.e)) * dparam + self.beta(t) * self.v_t), \n",
" (param, param - self.v_t)]\n",
" \n",
" \n",
" class Adadelta(OptimizationMethod):\n",
" \"\"\"\n",
" g_t = (1-gamma)*df^2 + gamma*g_{t-1}\n",
" d = alpha*sqrt(s_t + e)/sqrt(e + g_t) * df\n",
" s_t = (1-gamma)*d^2 + gamma*s_{t-1}\n",
" w = w - d\n",
" \"\"\"\n",
" def __init__(self, alpha, gamma, e = 0.001):\n",
" self.alpha = warp(alpha)\n",
" self.gamma = warp(gamma)\n",
" self.e = e\n",
" self.__inited = False\n",
" \n",
" def __call__(self, param, dparam, t):\n",
" if not self.__inited:\n",
" self.g_t = theano.shared(name='g_t_%s' % param.name, value = np.zeros_like(param.get_value()))\n",
" self.s_t = theano.shared(name='s_t_%s' % param.name, value = np.zeros_like(param.get_value()))\n",
" self.__inited = True\n",
" \n",
" d = self.alpha(t) * T.sqrt(self.s_t + self.e) / T.sqrt(self.g_t + self.e) * dparam\n",
" \n",
" return [(self.g_t, (1-self.gamma(t)) * dparam**2 + self.gamma(t)*self.g_t),\n",
" (self.s_t, (1-self.gamma(t)) * d**2 + self.gamma(t)*self.s_t), \n",
" (param, param - d)]"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from collections import OrderedDict\n",
"import time\n",
"\n",
"flattened = lambda l: [item for sublist in l for item in sublist]\n",
"\n",
"class Objective:\n",
" def __init__(self, exp_f, inputs, params, updates, dist = np.random.rand):\n",
" self.inputs = OrderedDict()\n",
" for name, t in inputs:\n",
" self.inputs[name] = t(name=name)\n",
" \n",
" self.w = OrderedDict()\n",
" for name, shape in params:\n",
" self.w[name] = theano.shared(name=name, value=dist(*shape))\n",
" \n",
" exp = exp_f(**dict(self.inputs, **self.w))\n",
" dparams = T.grad(exp, self.w.values())\n",
" \n",
" self.ks = theano.shared(name='k', value=1)\n",
" \n",
" self.updates = (\n",
" flattened([updater(param, dparam, self.ks) \n",
" for param, updater, dparam \n",
" in zip(self.w.values(), updates, dparams)])\n",
" + [(self.ks, self.ks+1)]\n",
" )\n",
" \n",
" self.f = theano.function(self.inputs.values(), exp, updates = self.updates) \n",
" \n",
" def step(self, data):\n",
" return self.f(**data)\n",
" \n",
" def run(self, data, steps = None, tol = None, verbose=False, batch_n = None, max_time=None): \n",
" if steps == None and tol == None:\n",
" raise ValueError, \"Either steps or tol must be specified\"\n",
" \n",
" if steps == None:\n",
" steps = np.inf\n",
" \n",
" if tol == None:\n",
" tol = 0\n",
" \n",
" N = data.items()[0][1].shape[0]\n",
"\n",
" if batch_n == None:\n",
" batch_n = 1\n",
" \n",
" step_n = 0\n",
" f_vals = []\n",
" \n",
" start = time.clock()\n",
" while step_n < steps and (step_n < 3 or abs(f_vals[-2] - f_vals[-1]) > tol):\n",
" tmp_vals = []\n",
" for batch_ids in np.split(np.arange(N, dtype=np.int32), batch_n):\n",
" val = self.step(dict(\n",
" [(key, val[batch_ids]) for key, val in data.items()]\n",
" ))\n",
"\n",
" tmp_vals.append(val)\n",
" \n",
" f_vals.append(np.mean(tmp_vals))\n",
" step_n += 1\n",
" \n",
" if max_time != None:\n",
" if time.clock() - start > max_time:\n",
" break\n",
" if verbose:\n",
" print val, ' ', \n",
" \n",
" duration = time.clock() - start\n",
" \n",
" print 'Done in %d steps [%.3f CPU sec] with final resudal = %f' % (step_n, duration, f_vals[-2] - f_vals[-1])\n",
" \n",
" return dict([(name, w.get_value()) for name, w in self.w.items()])"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"|| w_hat - w_0 ||_2 = 1.01620878162\n",
"Done in 665 steps [0.140 CPU sec] with final resudal = 0.000000\n",
"|| w_hat - w* ||_2 = 0.139634245283\n"
]
}
],
"source": [
"if __name__ == '__main__':\n",
" def test_least_squares():\n",
" N = 100\n",
" M = 10\n",
" X_real = np.random.rand(N, M)\n",
" w_real = np.random.rand(M, 1)\n",
" err = 0.1\n",
" y_real = X_real.dot(w_real) + err*np.random.normal(size=N).reshape((-1, 1))\n",
"\n",
" f = lambda X, y, w: T.sum( (y - X.dot(w))**2 )\n",
"\n",
" opt = Objective(f,\n",
" inputs = [('X', T.matrix), ('y', T.matrix)], \n",
" params = [('w', (M, 1))],\n",
"# updates=[methods.GD(1e-3)],\n",
"# updates= [methods.Adadelta(1e-2, 1e-2)],\n",
"# updates= [methods.RMSprop(1e-2, 1e-2, 1e-2)],\n",
" updates= [methods.Momentum(1e-3, 1e-3)],\n",
" )\n",
"\n",
" print '|| w_hat - w_0 ||_2 = ', np.linalg.norm((w_real - opt.w['w'].get_value()))\n",
"\n",
" params = opt.run(data = {'X': X_real, 'y': y_real}, \n",
" steps = 10e6,\n",
" tol = 10e-8,\n",
" max_time=20) # sec\n",
"\n",
" print '|| w_hat - w* ||_2 = ', np.linalg.norm((w_real - params['w']))\n",
" \n",
" test_least_squares()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment