Skip to content

Instantly share code, notes, and snippets.

@ilkkapeltola ilkkapeltola/launch_hue.py
Last active May 29, 2018

Embed
What would you like to do?
Launch Hue in a sandbox
import os
import configparser
import platform
import json
import subprocess as sp
import tempfile
config = configparser.ConfigParser()
config.read('config.ini')
settings = config['DEFAULT']
#load settings from config.ini
chromepath = settings['chromepath'] if 'chromepath' in settings else 'not set'
key_extension = '.ppk' if platform.system() == 'Windows' else '.pem'
keyname = settings['keyname'] if 'keyname' in settings else 'not-set'
profile = settings['profile'] if 'profile' in settings else 'sandbox'
region = settings['region'] if 'region' in settings else 'eu-central-1'
instance_type = settings['instance_type'] if 'instance_type' in settings else 'm4.large'
subnet = settings['subnet'] if 'subnet' in settings else ""
instances = int(settings['instances']) if 'instances' in settings else 1
apps = json.loads(settings['apps']) if 'apps' in settings else [{'Name': 'HIVE'}, {'Name': 'Hue'}]
keyfile = keyname + key_extension
release_label = settings['release_label'] if 'release_label' in settings else 'emr-5.13.0'
profiledir = tempfile.gettempdir() + "\\" + "chrome_emr_socks_session"
cmd = "where" if platform.system() == "Windows" else "which"
tunneling = "putty" if platform.system() == "Windows" else "ssh"
try:
sp.call([cmd, tunneling])
except:
raise Exception(tunneling + " is not found")
# This works on Windows, but doesn't yet work on Mac for example.
while not os.path.isfile(chromepath) and platform.system() == "Windows":
print("Chrome not found.")
chromepath = 'not found'
chromepath = input('Full path to chrome [' + chromepath +']')
cluster_name = settings['clustername'] if 'clustername' in settings else 'Unnamed test cluster'
cluster_name_input = input('Name your cluster ['+ cluster_name +']: ')
if cluster_name_input != "":
cluster_name = cluster_name_input
# store all settings
config['DEFAULT']['chromepath'] = chromepath
config['DEFAULT']['clustername'] = cluster_name
config['DEFAULT']['keyname'] = keyname
config['DEFAULT']['region'] = region
config['DEFAULT']['subnet'] = subnet
config['DEFAULT']['instances'] = str(instances)
config['DEFAULT']['instance_type'] = instance_type
config['DEFAULT']['apps'] = json.dumps(apps)
config['DEFAULT']['profile'] = profile
config['DEFAULT']['release_label'] = release_label
with open('config.ini', 'w') as configfile:
config.write(configfile)
if not os.path.isfile(keyname + key_extension):
raise Exception("keyfile " + keyname + key_extension + " not found in current directory.\n\nCheck config.ini" )
import boto3
session = boto3.Session(profile_name = profile)
# This just chooses a random subnet from the first VPC in the list,
# if a subnet wasn't specified.
import random
if subnet == "":
ec2 = session.resource('ec2')
first_vpc = list(ec2.vpcs.all())[0]
subnets = list(first_vpc.subnets.all())
subnet = random.choice(subnets).id
client = session.client('emr', region_name = region)
response = client.run_job_flow(
Name=cluster_name,
ReleaseLabel=release_label,
Instances={
'MasterInstanceType': instance_type,
'SlaveInstanceType': instance_type,
'InstanceCount': instances,
'KeepJobFlowAliveWhenNoSteps': True,
'TerminationProtected': False,
'Ec2SubnetId': subnet,
'Ec2KeyName': keyname,
},
Applications=apps,
VisibleToAllUsers=True,
JobFlowRole='EMR_EC2_DefaultRole',
ServiceRole='EMR_DefaultRole'
)
job_flow_id = response['JobFlowId']
import time
start = time.time()
cluster_info = client.describe_cluster(ClusterId = job_flow_id)
while cluster_info['Cluster']['Status']['State'] == 'STARTING':
elapsed = time.time() - start
print(("waiting... it has been %2.0f " % elapsed) + "seconds. Might take about 500 seconds." )
time.sleep(20)
cluster_info = client.describe_cluster(ClusterId = job_flow_id)
print("done!")
master_public_dns_name = cluster_info['Cluster']['MasterPublicDnsName']
#This opens the SSH tunnel
if platform.system() == 'Windows':
process_id = sp.Popen(['cmd.exe', '/c', 'echo', 'y', '|'
, 'plink.exe', '-i', keyfile, '-N', '-D', '8157', 'hadoop@' + master_public_dns_name], shell=True)
else:
# I haven't tested this, so apologies if it won't work. I think it will.
process_id = sp.Popen(['ssh', '-o', "'StrictHostKeyChecking no'"
, '-i', keyfile, '-N', '-D', '8157', 'hadoop@' + master_public_dns_name], shell=True)
#Open browser and wait. Kill stuff when you close browser.
command = [chromepath # e.g. 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe'
, '--proxy-server=socks5://127.0.0.1:8157'
, '--user-data-dir=' + profiledir # e.g. 'C:\\Users\\ILKKAP~1\\AppData\\Local\\Temp\\chrome_emr_socks_session'
, 'http://' + master_public_dns_name + ':8888'
]
sp.call(command, stdout=sp.PIPE,stderr=sp.PIPE)
should_stop = input("Terminating the cluster unless you type 'no'")
if should_stop != 'no':
print("stopping everything")
# Stop the SSH tunnel
process_id.kill()
# Terminate the cluster
response = client.terminate_job_flows(
JobFlowIds=[
job_flow_id,
]
)
else:
print("ok, but you need to take care of stopping the clusters yourself!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.