Last active
November 7, 2015 02:13
-
-
Save MInner/5ea6cd3ea2ead984075e to your computer and use it in GitHub Desktop.
(Example) Optimization Using Theano
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import abc\n", | |
"import numpy as np\n", | |
"import theano\n", | |
"import theano.tensor as T" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def warp(f):\n", | |
" if isinstance(f, (int, long, float, complex)): # only one num\n", | |
" return lambda k: f\n", | |
" elif isinstance(f, type(lambda:0)):\n", | |
" try:\n", | |
" return f\n", | |
" except:\n", | |
" raise ValueError, \"error while computing: gd_step(k) function should follow prototype: int -> float\"\n", | |
" else:\n", | |
" raise ValueError, \"gd_step must be either a float or int -> float\"\n", | |
"\n", | |
"class methods(object):\n", | |
" class OptimizationMethod(object):\n", | |
" __metaclass__ = abc.ABCMeta\n", | |
" @abc.abstractmethod\n", | |
" def __call__(self, param, dparam, t):\n", | |
" \"\"\"\n", | |
" Sould return list of updated version of param given param, \n", | |
" derivateve of loss wrt that param and step number \n", | |
" \"\"\"\n", | |
" pass\n", | |
" \n", | |
" class GD(OptimizationMethod):\n", | |
" def __init__(self, gd_step):\n", | |
" self.step = warp(gd_step)\n", | |
" \n", | |
" def __call__(self, param, dparam, t):\n", | |
" return [(param, param - self.step(t)*dparam)]\n", | |
" \n", | |
" class Momentum(OptimizationMethod):\n", | |
" \"\"\"\n", | |
" v_{t+1} = alpha * dparam_t + beta*v_t\n", | |
" param_{t+1} = param_t - v_{t+1}\n", | |
" \"\"\"\n", | |
" def __init__(self, alpha, beta):\n", | |
" self.alpha = warp(alpha)\n", | |
" self.beta = warp(beta)\n", | |
" self.__inited = False\n", | |
" \n", | |
" def __call__(self, param, dparam, t):\n", | |
" if not self.__inited:\n", | |
" self.v_t = theano.shared(name='v_t_%s' % param.name, value = np.zeros_like(param.get_value()))\n", | |
" self.__inited = True\n", | |
" \n", | |
" return [(self.v_t, self.alpha(t) * dparam + self.beta(t) * self.v_t), \n", | |
" (param, param - self.v_t)]\n", | |
" \n", | |
" class RMSprop(OptimizationMethod):\n", | |
" \"\"\"\n", | |
" r_t = (1-gamma)*df^2 + gamma*r_{t-1}\n", | |
" v_{t+1} = alpha/sqrt(r_t) * df + beta*v_t\n", | |
" param = param - v_{t+1}\n", | |
" \"\"\"\n", | |
" def __init__(self, alpha, beta, gamma, e = 1e-3):\n", | |
" self.alpha = warp(alpha)\n", | |
" self.beta = warp(beta) # momentum = 0 by default\n", | |
" self.gamma = warp(gamma)\n", | |
" self.e = e\n", | |
" self.__inited = False\n", | |
" \n", | |
" def __call__(self, param, dparam, t):\n", | |
" if not self.__inited:\n", | |
" self.v_t = theano.shared(name='v_t_%s' % param.name, value = np.zeros_like(param.get_value()))\n", | |
" self.r_t = theano.shared(name='r_t_%s' % param.name, value = np.zeros_like(param.get_value()))\n", | |
" self.__inited = True\n", | |
" \n", | |
" return [(self.r_t, (1-self.gamma(t)) * dparam**2 + self.gamma(t)*self.r_t),\n", | |
" (self.v_t, self.alpha(t)/(T.sqrt(self.r_t + self.e)) * dparam + self.beta(t) * self.v_t), \n", | |
" (param, param - self.v_t)]\n", | |
" \n", | |
" \n", | |
" class Adadelta(OptimizationMethod):\n", | |
" \"\"\"\n", | |
" g_t = (1-gamma)*df^2 + gamma*g_{t-1}\n", | |
" d = alpha*sqrt(s_t + e)/sqrt(e + g_t) * df\n", | |
" s_t = (1-gamma)*d^2 + gamma*s_{t-1}\n", | |
" w = w - d\n", | |
" \"\"\"\n", | |
" def __init__(self, alpha, gamma, e = 0.001):\n", | |
" self.alpha = warp(alpha)\n", | |
" self.gamma = warp(gamma)\n", | |
" self.e = e\n", | |
" self.__inited = False\n", | |
" \n", | |
" def __call__(self, param, dparam, t):\n", | |
" if not self.__inited:\n", | |
" self.g_t = theano.shared(name='g_t_%s' % param.name, value = np.zeros_like(param.get_value()))\n", | |
" self.s_t = theano.shared(name='s_t_%s' % param.name, value = np.zeros_like(param.get_value()))\n", | |
" self.__inited = True\n", | |
" \n", | |
" d = self.alpha(t) * T.sqrt(self.s_t + self.e) / T.sqrt(self.g_t + self.e) * dparam\n", | |
" \n", | |
" return [(self.g_t, (1-self.gamma(t)) * dparam**2 + self.gamma(t)*self.g_t),\n", | |
" (self.s_t, (1-self.gamma(t)) * d**2 + self.gamma(t)*self.s_t), \n", | |
" (param, param - d)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 51, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from collections import OrderedDict\n", | |
"import time\n", | |
"\n", | |
"flattened = lambda l: [item for sublist in l for item in sublist]\n", | |
"\n", | |
"class Objective:\n", | |
" def __init__(self, exp_f, inputs, params, updates, dist = np.random.rand):\n", | |
" self.inputs = OrderedDict()\n", | |
" for name, t in inputs:\n", | |
" self.inputs[name] = t(name=name)\n", | |
" \n", | |
" self.w = OrderedDict()\n", | |
" for name, shape in params:\n", | |
" self.w[name] = theano.shared(name=name, value=dist(*shape))\n", | |
" \n", | |
" exp = exp_f(**dict(self.inputs, **self.w))\n", | |
" dparams = T.grad(exp, self.w.values())\n", | |
" \n", | |
" self.ks = theano.shared(name='k', value=1)\n", | |
" \n", | |
" self.updates = (\n", | |
" flattened([updater(param, dparam, self.ks) \n", | |
" for param, updater, dparam \n", | |
" in zip(self.w.values(), updates, dparams)])\n", | |
" + [(self.ks, self.ks+1)]\n", | |
" )\n", | |
" \n", | |
" self.f = theano.function(self.inputs.values(), exp, updates = self.updates) \n", | |
" \n", | |
" def step(self, data):\n", | |
" return self.f(**data)\n", | |
" \n", | |
" def run(self, data, steps = None, tol = None, verbose=False, batch_n = None, max_time=None): \n", | |
" if steps == None and tol == None:\n", | |
" raise ValueError, \"Either steps or tol must be specified\"\n", | |
" \n", | |
" if steps == None:\n", | |
" steps = np.inf\n", | |
" \n", | |
" if tol == None:\n", | |
" tol = 0\n", | |
" \n", | |
" N = data.items()[0][1].shape[0]\n", | |
"\n", | |
" if batch_n == None:\n", | |
" batch_n = 1\n", | |
" \n", | |
" step_n = 0\n", | |
" f_vals = []\n", | |
" \n", | |
" start = time.clock()\n", | |
" while step_n < steps and (step_n < 3 or abs(f_vals[-2] - f_vals[-1]) > tol):\n", | |
" tmp_vals = []\n", | |
" for batch_ids in np.split(np.arange(N, dtype=np.int32), batch_n):\n", | |
" val = self.step(dict(\n", | |
" [(key, val[batch_ids]) for key, val in data.items()]\n", | |
" ))\n", | |
"\n", | |
" tmp_vals.append(val)\n", | |
" \n", | |
" f_vals.append(np.mean(tmp_vals))\n", | |
" step_n += 1\n", | |
" \n", | |
" if max_time != None:\n", | |
" if time.clock() - start > max_time:\n", | |
" break\n", | |
" if verbose:\n", | |
" print val, ' ', \n", | |
" \n", | |
" duration = time.clock() - start\n", | |
" \n", | |
" print 'Done in %d steps [%.3f CPU sec] with final resudal = %f' % (step_n, duration, f_vals[-2] - f_vals[-1])\n", | |
" \n", | |
" return dict([(name, w.get_value()) for name, w in self.w.items()])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 53, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"|| w_hat - w_0 ||_2 = 1.01620878162\n", | |
"Done in 665 steps [0.140 CPU sec] with final resudal = 0.000000\n", | |
"|| w_hat - w* ||_2 = 0.139634245283\n" | |
] | |
} | |
], | |
"source": [ | |
"if __name__ == '__main__':\n", | |
" def test_least_squares():\n", | |
" N = 100\n", | |
" M = 10\n", | |
" X_real = np.random.rand(N, M)\n", | |
" w_real = np.random.rand(M, 1)\n", | |
" err = 0.1\n", | |
" y_real = X_real.dot(w_real) + err*np.random.normal(size=N).reshape((-1, 1))\n", | |
"\n", | |
" f = lambda X, y, w: T.sum( (y - X.dot(w))**2 )\n", | |
"\n", | |
" opt = Objective(f,\n", | |
" inputs = [('X', T.matrix), ('y', T.matrix)], \n", | |
" params = [('w', (M, 1))],\n", | |
"# updates=[methods.GD(1e-3)],\n", | |
"# updates= [methods.Adadelta(1e-2, 1e-2)],\n", | |
"# updates= [methods.RMSprop(1e-2, 1e-2, 1e-2)],\n", | |
" updates= [methods.Momentum(1e-3, 1e-3)],\n", | |
" )\n", | |
"\n", | |
" print '|| w_hat - w_0 ||_2 = ', np.linalg.norm((w_real - opt.w['w'].get_value()))\n", | |
"\n", | |
" params = opt.run(data = {'X': X_real, 'y': y_real}, \n", | |
" steps = 10e6,\n", | |
" tol = 10e-8,\n", | |
" max_time=20) # sec\n", | |
"\n", | |
" print '|| w_hat - w* ||_2 = ', np.linalg.norm((w_real - params['w']))\n", | |
" \n", | |
" test_least_squares()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.9" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment