Skip to content

Instantly share code, notes, and snippets.

@ilkkapeltola
Last active May 29, 2018 11:41
Show Gist options
  • Save ilkkapeltola/d7f0f4e400134455921fefb7dffba90e to your computer and use it in GitHub Desktop.
Save ilkkapeltola/d7f0f4e400134455921fefb7dffba90e to your computer and use it in GitHub Desktop.
Launch Hue in a sandbox
import os
import configparser
import platform
import json
import subprocess as sp
import tempfile
config = configparser.ConfigParser()
config.read('config.ini')
settings = config['DEFAULT']
#load settings from config.ini
chromepath = settings['chromepath'] if 'chromepath' in settings else 'not set'
key_extension = '.ppk' if platform.system() == 'Windows' else '.pem'
keyname = settings['keyname'] if 'keyname' in settings else 'not-set'
profile = settings['profile'] if 'profile' in settings else 'sandbox'
region = settings['region'] if 'region' in settings else 'eu-central-1'
instance_type = settings['instance_type'] if 'instance_type' in settings else 'm4.large'
subnet = settings['subnet'] if 'subnet' in settings else ""
instances = int(settings['instances']) if 'instances' in settings else 1
apps = json.loads(settings['apps']) if 'apps' in settings else [{'Name': 'HIVE'}, {'Name': 'Hue'}]
keyfile = keyname + key_extension
release_label = settings['release_label'] if 'release_label' in settings else 'emr-5.13.0'
profiledir = tempfile.gettempdir() + "\\" + "chrome_emr_socks_session"
cmd = "where" if platform.system() == "Windows" else "which"
tunneling = "putty" if platform.system() == "Windows" else "ssh"
try:
sp.call([cmd, tunneling])
except:
raise Exception(tunneling + " is not found")
# This works on Windows, but doesn't yet work on Mac for example.
while not os.path.isfile(chromepath) and platform.system() == "Windows":
print("Chrome not found.")
chromepath = 'not found'
chromepath = input('Full path to chrome [' + chromepath +']')
cluster_name = settings['clustername'] if 'clustername' in settings else 'Unnamed test cluster'
cluster_name_input = input('Name your cluster ['+ cluster_name +']: ')
if cluster_name_input != "":
cluster_name = cluster_name_input
# store all settings
config['DEFAULT']['chromepath'] = chromepath
config['DEFAULT']['clustername'] = cluster_name
config['DEFAULT']['keyname'] = keyname
config['DEFAULT']['region'] = region
config['DEFAULT']['subnet'] = subnet
config['DEFAULT']['instances'] = str(instances)
config['DEFAULT']['instance_type'] = instance_type
config['DEFAULT']['apps'] = json.dumps(apps)
config['DEFAULT']['profile'] = profile
config['DEFAULT']['release_label'] = release_label
with open('config.ini', 'w') as configfile:
config.write(configfile)
if not os.path.isfile(keyname + key_extension):
raise Exception("keyfile " + keyname + key_extension + " not found in current directory.\n\nCheck config.ini" )
import boto3
session = boto3.Session(profile_name = profile)
# This just chooses a random subnet from the first VPC in the list,
# if a subnet wasn't specified.
import random
if subnet == "":
ec2 = session.resource('ec2')
first_vpc = list(ec2.vpcs.all())[0]
subnets = list(first_vpc.subnets.all())
subnet = random.choice(subnets).id
client = session.client('emr', region_name = region)
response = client.run_job_flow(
Name=cluster_name,
ReleaseLabel=release_label,
Instances={
'MasterInstanceType': instance_type,
'SlaveInstanceType': instance_type,
'InstanceCount': instances,
'KeepJobFlowAliveWhenNoSteps': True,
'TerminationProtected': False,
'Ec2SubnetId': subnet,
'Ec2KeyName': keyname,
},
Applications=apps,
VisibleToAllUsers=True,
JobFlowRole='EMR_EC2_DefaultRole',
ServiceRole='EMR_DefaultRole'
)
job_flow_id = response['JobFlowId']
import time
start = time.time()
cluster_info = client.describe_cluster(ClusterId = job_flow_id)
while cluster_info['Cluster']['Status']['State'] == 'STARTING':
elapsed = time.time() - start
print(("waiting... it has been %2.0f " % elapsed) + "seconds. Might take about 500 seconds." )
time.sleep(20)
cluster_info = client.describe_cluster(ClusterId = job_flow_id)
print("done!")
master_public_dns_name = cluster_info['Cluster']['MasterPublicDnsName']
#This opens the SSH tunnel
if platform.system() == 'Windows':
process_id = sp.Popen(['cmd.exe', '/c', 'echo', 'y', '|'
, 'plink.exe', '-i', keyfile, '-N', '-D', '8157', 'hadoop@' + master_public_dns_name], shell=True)
else:
# I haven't tested this, so apologies if it won't work. I think it will.
process_id = sp.Popen(['ssh', '-o', "'StrictHostKeyChecking no'"
, '-i', keyfile, '-N', '-D', '8157', 'hadoop@' + master_public_dns_name], shell=True)
#Open browser and wait. Kill stuff when you close browser.
command = [chromepath # e.g. 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe'
, '--proxy-server=socks5://127.0.0.1:8157'
, '--user-data-dir=' + profiledir # e.g. 'C:\\Users\\ILKKAP~1\\AppData\\Local\\Temp\\chrome_emr_socks_session'
, 'http://' + master_public_dns_name + ':8888'
]
sp.call(command, stdout=sp.PIPE,stderr=sp.PIPE)
should_stop = input("Terminating the cluster unless you type 'no'")
if should_stop != 'no':
print("stopping everything")
# Stop the SSH tunnel
process_id.kill()
# Terminate the cluster
response = client.terminate_job_flows(
JobFlowIds=[
job_flow_id,
]
)
else:
print("ok, but you need to take care of stopping the clusters yourself!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment