Skip to content

Instantly share code, notes, and snippets.

@ericmjonas
Created October 15, 2016 18:09
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ericmjonas/0b44db5873d309960613e337b27e7662 to your computer and use it in GitHub Desktop.
Save ericmjonas/0b44db5873d309960613e337b27e7662 to your computer and use it in GitHub Desktop.
A fabfile to launch an AWS gpu instance with additional local storage and tensorflow set up
"""
Fabric file to help with launching EC2 P2 instancesand
getting GPU support set up. Also installs latest
anaconda and then tensorflow. Use:
fab launch
# wait until you can ssh into the instance with
fab -R mygpu ssh
# install everything
fab -R mygpu basic_setup cuda_setup anaconda_setup tf_setup
# when you're done, terminate
fab -R mygpu terminate
Took inspiration from:
https://aws.amazon.com/blogs/aws/new-p2-instance-type-for-amazon-ec2-up-to-16-gpus/
"""
from fabric.api import local, env, run, put, cd, task, sudo, settings, warn_only, lcd, path, get
from fabric.contrib import project
import boto3
tgt_ami = 'ami-b04e92d0'
region = 'us-west-2'
unique_instance_name = 'p2_instance'
my_aws_key = 'ec2-us-west-2'
instance_name = "mygpu"
def tags_to_dict(d):
return {a['Key'] : a['Value'] for a in d}
def get_target_instance():
res = []
ec2 = boto3.resource('ec2', region_name=region)
for i in ec2.instances.all():
if i.state['Name'] == 'running':
d = tags_to_dict(i.tags)
if d['Name'] == unique_instance_name:
res.append('ec2-user@{}'.format(i.public_dns_name))
print "found", res
return {instance_name : res}
env.roledefs.update(get_target_instance())
@task
def launch():
ec2 = boto3.resource('ec2', region_name=region)
BlockDeviceMappings=[
{
'DeviceName': '/dev/xvda',
'Ebs': {
'VolumeSize': 50,
'DeleteOnTermination': True,
'VolumeType': 'standard',
'SnapshotId' : 'snap-c87f35ec'
},
},
]
instances = ec2.create_instances(ImageId=tgt_ami, MinCount=1, MaxCount=1,
KeyName=my_aws_key, InstanceType='p2.xlarge',
BlockDeviceMappings = BlockDeviceMappings
)
inst = instances[0]
print inst
inst.wait_until_running()
inst.reload()
inst.create_tags(
Resources=[
inst.instance_id
],
Tags=[
{
'Key': 'Name',
'Value': unique_instance_name
},
]
)
@task
def ssh():
local("ssh -A " + env.host_string)
@task
def basic_setup():
run("sudo yum update -q -y")
run("sudo yum groupinstall 'Development Tools' -q -y")
run("sudo yum install -q -y emacs tmux gcc g++")
run("sudo yum install -y kernel-devel-`uname -r`")
@task
def cuda_setup():
run("wget http://us.download.nvidia.com/XFree86/Linux-x86_64/352.99/NVIDIA-Linux-x86_64-352.99.run")
run("wget http://developer.download.nvidia.com/compute/cuda/7.5/Prod/local_installers/cuda_7.5.18_linux.run")
run("chmod +x NVIDIA-Linux-x86_64-352.99.run")
run("chmod +x cuda_7.5.18_linux.run")
sudo("./NVIDIA-Linux-x86_64-352.99.run --silent") # still requires a few prompts
sudo("./cuda_7.5.18_linux.run --silent --toolkit --samples") # Don't install driver, just install CUDA and sample
sudo("nvidia-smi -pm 1")
sudo("nvidia-smi -acp 0")
sudo("nvidia-smi --auto-boost-permission=0")
sudo("nvidia-smi -ac 2505,875")
sudo('echo "/usr/local/cuda/lib/" >> /etc/ld.so.conf')
sudo('echo "/usr/local/cuda/lib64/" >> /etc/ld.so.conf')
sudo('ldconfig')
@task
def anaconda_setup():
run("wget https://repo.continuum.io/archive/Anaconda2-4.2.0-Linux-x86_64.sh")
run("chmod +x Anaconda2-4.2.0-Linux-x86_64.sh")
run("./Anaconda2-4.2.0-Linux-x86_64.sh -b -p $HOME/anaconda")
run('echo "export PATH=$HOME/anaconda/bin:$PATH" >> .bash_profile')
run("conda upgrade -q -y --all")
run("conda install -q -y pandas scikit-learn scikit-image matplotlib seaborn")
TF_URL = "https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl"
@task
def tf_setup():
run("pip install --ignore-installed --upgrade {}".format(TF_URL))
@task
def terminate():
ec2 = boto3.resource('ec2', region_name=region)
insts = []
for i in ec2.instances.all():
if i.state['Name'] == 'running':
d = tags_to_dict(i.tags)
if d['Name'] == unique_instance_name:
i.terminate()
insts.append(i)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment