Skip to content

Instantly share code, notes, and snippets.

@hannes-brt
Created December 17, 2012 02:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hannes-brt/4315484 to your computer and use it in GitHub Desktop.
Save hannes-brt/4315484 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "Cluster_Training_Demo"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"from IPython.parallel import Client\n",
"\n",
"#c = Client(profile='GPU_pig1_ssh')\n",
"c = Client(profile='GPU_clusters')\n",
"num_c_nodes = len(c.ids)\n",
"\n",
"print c.ids\n",
"\n",
"lview = c.load_balanced_view()\n",
"lview.block = True\n",
"dview = c[:]\n",
"cnodes = c\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[0, 1, 2, 3, 4, 5, 6, 7]\n"
]
}
],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"@dview.remote(block=False)\n",
"def getpid():\n",
" import os\n",
" return os.getpid()\n",
"\n",
"c_pid = getpid()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%px %load_ext autoreload\n",
"%px %autoreload 2\n",
"\n",
"def assignGPUID(c):\n",
" for i in c.ids:\n",
" c[i].execute('import gnumpy as gnp; gnp._useGPUid='+str(i%4))\n",
" c[i].execute('import learningUtil_test; import AE_class;')\n",
" print 'import gnumpy as gnp; gnp._useGPUid='+str(i%4)\n",
"\n",
" return\n",
"\n",
"assignGPUID(c)\n",
"%px reload(learningUtil_test)\n",
"%px reload(AE_class)\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[stdout:0] \n",
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n",
"[stdout:1] \n",
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n",
"[stdout:2] \n",
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n",
"[stdout:3] \n",
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n",
"import gnumpy as gnp; gnp._useGPUid=0"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"import gnumpy as gnp; gnp._useGPUid=1\n",
"import gnumpy as gnp; gnp._useGPUid=2\n",
"import gnumpy as gnp; gnp._useGPUid=3\n",
"import gnumpy as gnp; gnp._useGPUid=0\n",
"import gnumpy as gnp; gnp._useGPUid=1\n",
"import gnumpy as gnp; gnp._useGPUid=2\n",
"import gnumpy as gnp; gnp._useGPUid=3\n"
]
},
{
"output_type": "display_data",
"text": [
"\u001b[0;31mOut[0:26]: \u001b[0m<module 'learningUtil_test' from '/home/jimmy/notebooks/learningUtil_test.pyc'>"
]
},
{
"output_type": "display_data",
"text": [
"\u001b[0;31mOut[1:26]: \u001b[0m<module 'learningUtil_test' from '/home/jimmy/notebooks/learningUtil_test.pyc'>"
]
},
{
"output_type": "display_data",
"text": [
"\u001b[0;31mOut[2:26]: \u001b[0m<module 'learningUtil_test' from '/home/jimmy/notebooks/learningUtil_test.pyc'>"
]
},
{
"output_type": "display_data",
"text": [
"\u001b[0;31mOut[3:26]: \u001b[0m<module 'learningUtil_test' from '/home/jimmy/notebooks/learningUtil_test.pyc'>"
]
},
{
"output_type": "display_data",
"text": [
"\u001b[0;31mOut[4:26]: \u001b[0m<module 'learningUtil_test' from '/home/jimmy/notebooks/learningUtil_test.pyc'>"
]
},
{
"output_type": "display_data",
"text": [
"\u001b[0;31mOut[5:26]: \u001b[0m<module 'learningUtil_test' from '/home/jimmy/notebooks/learningUtil_test.pyc'>"
]
},
{
"output_type": "display_data",
"text": [
"\u001b[0;31mOut[6:26]: \u001b[0m<module 'learningUtil_test' from '/home/jimmy/notebooks/learningUtil_test.pyc'>"
]
},
{
"output_type": "display_data",
"text": [
"\u001b[0;31mOut[7:26]: \u001b[0m<module 'learningUtil_test' from '/home/jimmy/notebooks/learningUtil_test.pyc'>"
]
},
{
"output_type": "display_data",
"text": [
"\u001b[0;31mOut[0:27]: \u001b[0m<module 'AE_class' from '/home/jimmy/notebooks/AE_class.pyc'>"
]
},
{
"output_type": "display_data",
"text": [
"\u001b[0;31mOut[1:27]: \u001b[0m<module 'AE_class' from '/home/jimmy/notebooks/AE_class.pyc'>"
]
},
{
"output_type": "display_data",
"text": [
"\u001b[0;31mOut[2:27]: \u001b[0m<module 'AE_class' from '/home/jimmy/notebooks/AE_class.pyc'>"
]
},
{
"output_type": "display_data",
"text": [
"\u001b[0;31mOut[3:27]: \u001b[0m<module 'AE_class' from '/home/jimmy/notebooks/AE_class.pyc'>"
]
},
{
"output_type": "display_data",
"text": [
"\u001b[0;31mOut[4:27]: \u001b[0m<module 'AE_class' from '/home/jimmy/notebooks/AE_class.pyc'>"
]
},
{
"output_type": "display_data",
"text": [
"\u001b[0;31mOut[5:27]: \u001b[0m<module 'AE_class' from '/home/jimmy/notebooks/AE_class.pyc'>"
]
},
{
"output_type": "display_data",
"text": [
"\u001b[0;31mOut[6:27]: \u001b[0m<module 'AE_class' from '/home/jimmy/notebooks/AE_class.pyc'>"
]
},
{
"output_type": "display_data",
"text": [
"\u001b[0;31mOut[7:27]: \u001b[0m<module 'AE_class' from '/home/jimmy/notebooks/AE_class.pyc'>"
]
}
],
"prompt_number": 3
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Load dataset\n",
"==============="
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"## load mnist from skdata\n",
"from skdata import mnist\n",
"from util import show_filters\n",
"import time \n",
"\n",
"# -- load and prepare the data set (even download if necessary)\n",
"dtype = 'float32'\n",
"n_examples = 60000\n",
"#n_examples = 10000\n",
"n_test = 10000\n",
"#n_test = 1000\n",
"n_classes = 10 # -- denoted L in the math expressions\n",
"\n",
"img_shape = (28, 28)\n",
"\n",
"data_view = mnist.views.OfficialVectorClassification(x_dtype=dtype)\n",
"X = data_view.train.x[:n_examples]\n",
"y = data_view.train.y[:n_examples]\n",
"\n",
"#arrange the labels\n",
"Y = 1.0*(y[:,newaxis] == arange(0,n_classes))\n",
"\n",
"X_test = data_view.test.x[:n_test]\n",
"y_test = data_view.test.y[:n_test]\n",
"\n",
"#set up cross-validation set\n",
"#n_CV = 10000\n",
"\n",
"#X_CV = data_view.train.x[-n_CV:]\n",
"#y_CV = data_view.train.y[-n_CV:]\n",
"#Y_CV = 1.0*(y_CV[:,newaxis] == arange(0,n_classes))\n",
"\n",
"\n",
"m,D = X.shape;\n",
"\n",
"# show dataset\n",
"show_filters(X,img_shape,(10,10));\n"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Utility Func\n",
"=================="
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import gnumpy as gnp\n",
"gnp.free_reuse_cache()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def g_grid_param(dict_list):\n",
" if len(dict_list) < 1:\n",
" yield {}\n",
" return\n",
" for dict_param in g_grid_param(dict_list[1:]):\n",
" key, values = dict_list[0]\n",
" for value in values:\n",
" dict_param[key] = value\n",
" yield dict_param\n",
" "
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import learningUtil_test\n",
"import AE_class\n",
"reload(AE_class)\n",
"reload(learningUtil_test)\n",
"\n",
"\n",
"def genClusterJobs_AE(genDataFunc, batch_size, step_size, decay, \n",
" momentum_schedule, momentum_values, \n",
" momentum_bilinear, nesterov_momentum, \n",
" epoch, print_i, **kwargs):\n",
" experiment_list = list()\n",
" def_AE_param = {'Arch':(D, 1000), \n",
" 'actFunc':learningUtil_test.relu_gpu, 'actGradA':learningUtil_test.reluGradient_gpu,\n",
" 'tiedWeights':True,\n",
" 'linearOutput':True, \n",
" 'useGPU':True,\n",
" 'bias_offset': 0.}\n",
" \n",
" g = g_grid_param(kwargs.items())\n",
" \n",
" for params in g:\n",
" model_param = def_AE_param\n",
"\n",
" for key in params:\n",
" model_param[key] = params[key]\n",
" \n",
" model = AE_class.AE1(**model_param)\n",
" experiment = {'genDataFunc': genDataFunc, 'batch_size': batch_size, 'step_size': step_size, \n",
" 'loops': epoch, 'print_i':print_i, 'Model':model, \n",
" 'decay':decay, \n",
" 'momentum_schedule':momentum_schedule, 'momentum_values':momentum_values,\n",
" 'momentum_bilinear':momentum_bilinear, 'nesterov_momentum':nesterov_momentum }\n",
" experiment_list.append(experiment)\n",
" \n",
" return experiment_list"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def encodeData(X, model):\n",
" m, D = X.shape\n",
" R = np.zeros((m,model.Arch[1]))\n",
" batch = m/1000\n",
" for i in range(batch):\n",
" R[i*1000:(i+1)*1000] = model.encode(X[i*1000:(i+1)*1000])\n",
" \n",
" return R"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create cluster jobs\n",
"=========================="
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"n_epoch = 100\n",
"step_size = 0.003\n",
"decay = 0.99\n",
"momentum_schedule = (20, 70, 85, 100 )\n",
"momentum_values = (0.5, 0.99, 0.99, 0.5, 0.5)\n",
"\n",
"\n",
"momentum_bilinear = False\n",
"nesterov_momentum = True\n",
"batch_size = 100\n",
"print_i = 1\n",
"\n",
"\n",
"clusterJob_AE = genClusterJobs_AE(learningUtil_test.genDataMNIST, batch_size, step_size, decay, \n",
" momentum_schedule, momentum_values, momentum_bilinear, nesterov_momentum, \n",
" n_epoch, print_i,\n",
" Arch = ((D, 1000,),(D, 3000,)), \n",
" actFunc = (learningUtil_test.relu_gpu, ),\n",
" actGradA = (learningUtil_test.reluGradient_gpu, ),\n",
" probFunc = (learningUtil_test.sigmoid_gpu, ),\n",
" linearOutput = (True,),\n",
" tiedWeights = (True, ),\n",
" Input_dropout = (0.,0.1,0.2,0.5),\n",
" )"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Train AE\n",
"========================"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"clusterJob_AE_results = lview.map_async(learningUtil_test.runExperiment, clusterJob_AE)\n",
"\n",
"print 'Experiments submitted'"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"if clusterJob_AE_results.ready() is True:\n",
" print 'Experiment run time: ', clusterJob_AE_results.elapsed\n",
" #print clusterJob_AE_results.metadata[0]['stdout']\n",
"else:\n",
" print 'Experiment still running ', clusterJob_AE_results.progress, '/', len(clusterJob_AE_results), ' run time: ', clusterJob_AE_results.elapsed\n",
"for i in range(len(clusterJob_AE_results)):\n",
" print '###################'\n",
" if clusterJob_AE_results.metadata[i]['completed'] == None :\n",
" if len(clusterJob_AE_results.metadata[i]['stdout'].splitlines()) > 2:\n",
" print clusterJob_AE_results.metadata[i]['stdout'].splitlines()[-2]\n",
" else:\n",
" print 'initilizing'\n",
" else:\n",
" print clusterJob_AE_results.metadata[i]['stdout'].splitlines()[-2]"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"figure(figsize=(2,2));\n",
"\n",
"if clusterJob_AE_results.ready() is True:\n",
" plt.figure();\n",
" for i in range(len(clusterJob_AE_results)):\n",
" result = clusterJob_AE_results[i]\n",
" cost = result['cost']\n",
" print 'avg cost: ', cost[-1000:].mean()\n",
" plt.plot(np.arange(cost.shape[0]), np.log(1.+cost))\n",
" plt.title('struct denoising cost, avg. %0.2f'%cost[-1000:].mean(), fontsize=20)\n",
" #legend()\n",
" "
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Analysis\n",
"==================="
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"SVM\n",
"========================"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"clusterJob_SVM = learningUtil_test.genClusterJobs_SVM(clusterJob_AE_results, learningUtil_test.genDataMNIST)\n",
"clusterJob_SVM_results = lview.map_async(learningUtil_test.runLinearSVM, clusterJob_SVM)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"if clusterJob_SVM_results.ready() == False:\n",
" print 'cluster jobs still running'\n",
"else:\n",
" print 'totel time:', clusterJob_SVM_results.elapsed\n",
" \n",
"for i in range(len(clusterJob_SVM_results)):\n",
" print '###################' \n",
" if clusterJob_SVM_results.metadata[i]['completed'] == None :\n",
" print 'job still running'\n",
" else:\n",
" print clusterJob_SVM_results.metadata[i]['stdout']"
],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment