Skip to content

Instantly share code, notes, and snippets.

@zonca
Created August 16, 2017 02:01
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zonca/aaed55502c4b16535fe947791d02ac32 to your computer and use it in GitHub Desktop.
Save zonca/aaed55502c4b16535fe947791d02ac32 to your computer and use it in GitHub Desktop.
Comet Spawner and Jupyterhub Config
import os
from tornado import gen
from batchspawner import SlurmSpawner, run_command
from traitlets import Unicode, default
class CometSpawner(SlurmSpawner):
req_other = Unicode('', config=True, \
help="Additional resources requested"
)
req_image = Unicode('', config=True, \
help="Singularity image"
)
def _options_form_default(self):
"""Create a form for the user to choose the configuration for the SLURM job"""
return """
<label for="queue">Comet node type</label>
<select name="queue">
<option value="compute">standard node</option>
<option value="shared">shared node</option>
<option value="gpu">full GPU node</option>
<option value="gpu-shared">shared GPU node</option>
</select>
<label for="gpus">Number of GPUs (only for shared GPU node)</label>
<select name="gpus">
<option value="1">1</option>
<option value="2">2</option>
<option value="3">3</option>
<option value="4">4</option>
</select>
<label for="cores">Number of cores (only for shared node)</label>
<select name="cores">
<option value="1">1</option>
<option value="2">2</option>
<option value="6">6</option>
<option value="12">12</option>
</select>
<label for="runtime">Job duration</label>
<select name="runtime">
<option value="1:00:00">1 hour</option>
<option value="2:00:00">2 hours</option>
<option value="5:00:00">5 hours</option>
<option value="8:00:00">8 hours</option>
<option value="12:00:00">12 hours</option>
<option value="24:00:00">24 hours</option>
</select>
<label for="image">Container image</label>
<select name="image">
<option value="ubuntu_anaconda_cmb_jupyterhub.img"">Ubuntu 16.04 Anaconda 4.4 Python 3.6</option>
<option value="centos_anaconda_jupyterhub.img"">Centos 7 Anaconda 4.4 Python 3.6</option>
</select>
<label for="account">Account (leave empty for default)</label>
<input name="account"></input>
</select>
"""
def options_from_form(self, formdata):
"""Parse the form and add options to the SLURM job script"""
options = {}
options['queue'] = formdata.get('queue', [''])[0].strip()
options['runtime'] = formdata.get('runtime', [''])[0].strip()
options['image'] = formdata.get('image', [''])[0].strip()
options['other'] = ''
account = formdata.get('account', [''])[0].strip()
if account:
options['other'] += "#SBATCH --account={}".format(account)
if options['queue'].startswith('gpu'):
options['other'] += "\n#SBATCH --gres='gpu:{}'".format(formdata.get("gpus")[0])
if options['queue'] == "shared":
options['other'] += "\n#SBATCH --ntasks-per-node={}".format(formdata.get("cores")[0])
return options
@default('req_keepvars')
def _req_keepvars_default(self):
return "\n".join(["export {}={};".format(k,v) for k,v in self.get_env().items()])
@gen.coroutine
def submit_batch_script(self):
subvars = self.get_req_subvars()
cmd = self.batch_submit_cmd.format(**subvars)
subvars['cmd'] = self.cmd_formatted_for_batch()
if hasattr(self, 'user_options'):
subvars.update(self.user_options)
script = self.batch_script.format(**subvars)
self.log.info('Spawner submitting job using ' + cmd)
self.log.info('Spawner submitted script:\n' + script)
out = yield run_command(cmd, input=script, env=self.get_env())
try:
self.log.info('Job submitted. cmd: ' + cmd + ' output: ' + out)
self.job_id = self.parse_job_id(out)
except:
self.log.error('Job submission failed with exit code ' + out)
self.job_id = ''
return self.job_id
def user_env(self, env):
"""get user environment"""
env['USER'] = self.user.name
return env
@default('req_homedir')
def _req_homedir_default(self):
return "/home/SET_GATEWAY_USER_HERE/"
# Ansible managed
# Configuration file for jupyterhub.
c = get_config()
c.JupyterHub.ip = u'CONF_IP'
c.JupyterHub.port = 8000
c.JupyterHub.cookie_secret_file = u'/srv/jupyterhub/cookie_secret'
c.JupyterHub.db_url = u'/srv/jupyterhub/jupyterhub.sqlite'
c.JupyterHub.proxy_auth_token = u''
c.JupyterHub.confirm_no_ssl = True
c.JupyterHub.load_groups = {
}
from oauthenticator.globus import GlobusOAuthenticator
c.JupyterHub.authenticator_class = GlobusOAuthenticator
c.GlobusOAuthenticator.oauth_callback_url = 'https://xxx-xxx-xxx-xxx.compute.cloud.sdsc.edu/hub/oauth_callback'
c.GlobusOAuthenticator.client_id = 'CONF'
c.GlobusOAuthenticator.client_secret = 'CONF'
c.Authenticator.admin_users = {'zonca'}
c.Authenticator.whitelist = set()
c.JupyterHub.services = [
{
'name': 'cull_idle_servers',
'admin': True,
'command': ['/opt/conda/bin/python3', '/srv/jupyterhub/cull_idle_servers.py', '--cull-every=6000', '--timeout=36000']
},
]
import sys
sys.path.append("/etc/jupyterhub")
from comet_spawner import CometSpawner
c.JupyterHub.spawner_class = CometSpawner
c.SlurmSpawner.req_nprocs = '2'
c.SlurmSpawner.req_queue = 'compute'
c.SlurmSpawner.req_runtime = '12:00:00'
c.SlurmSpawner.req_memory = '4gb'
c.SlurmSpawner.req_host = 'comet.sdsc.edu'
c.SlurmSpawner.batch_script = '''#!/bin/bash
#SBATCH --job-name="jupyterhub"
#SBATCH --output="jupyterhub.%j.%N.out"
#SBATCH --partition={queue}
#SBATCH --nodes=1
###SBATCH --ntasks-per-node=24 # needs to be modified for shared queues
#SBATCH --time={runtime}
{other}
{keepvars}
module load singularity
IMAGE=/oasis/scratch/comet/zonca/temp_project/{image}
export PYTHONPATH=;
# create tunnelbot private SSH key
TUNNELBOT_RSA_PATH=$(mktemp)
echo "-----BEGIN RSA PRIVATE KEY-----
CONF
-----END RSA PRIVATE KEY-----" > $TUNNELBOT_RSA_PATH
chmod 600 $TUNNELBOT_RSA_PATH
# create tunnel from Comet to Jupyterhub
ssh -o "StrictHostKeyChecking no" -i $TUNNELBOT_RSA_PATH -N -f -L 8081:localhost:8081 tunnelbot@xxx-xxx-xxx-xxx.compute.cloud.sdsc.edu
{cmd}
'''
c.SlurmSpawner.batch_submit_cmd = 'ssh zoncatest@{host} sbatch'
c.SlurmSpawner.batch_query_cmd = '''ssh zoncatest@{host} 'squeue -h -j {job_id} -o "%T %B"' '''
c.SlurmSpawner.batch_cancel_cmd = '''ssh zoncatest@{host} scancel {job_id}'''
c.SlurmSpawner.state_exechost_exp = r'\1.sdsc.edu'
c.SlurmSpawner.cmd = ["singularity", "exec", "$IMAGE", "/opt/conda/bin/jupyterhub-singleuser"]
c.SlurmSpawner.start_timeout = 7200
c.SlurmSpawner.startup_poll_interval = 5.0
c.SlurmSpawner.http_timeout = 7200
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment