Last active
May 29, 2018 11:41
-
-
Save ilkkapeltola/d7f0f4e400134455921fefb7dffba90e to your computer and use it in GitHub Desktop.
Launch Hue in a sandbox
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import configparser | |
import platform | |
import json | |
import subprocess as sp | |
import tempfile | |
config = configparser.ConfigParser() | |
config.read('config.ini') | |
settings = config['DEFAULT'] | |
#load settings from config.ini | |
chromepath = settings['chromepath'] if 'chromepath' in settings else 'not set' | |
key_extension = '.ppk' if platform.system() == 'Windows' else '.pem' | |
keyname = settings['keyname'] if 'keyname' in settings else 'not-set' | |
profile = settings['profile'] if 'profile' in settings else 'sandbox' | |
region = settings['region'] if 'region' in settings else 'eu-central-1' | |
instance_type = settings['instance_type'] if 'instance_type' in settings else 'm4.large' | |
subnet = settings['subnet'] if 'subnet' in settings else "" | |
instances = int(settings['instances']) if 'instances' in settings else 1 | |
apps = json.loads(settings['apps']) if 'apps' in settings else [{'Name': 'HIVE'}, {'Name': 'Hue'}] | |
keyfile = keyname + key_extension | |
release_label = settings['release_label'] if 'release_label' in settings else 'emr-5.13.0' | |
profiledir = tempfile.gettempdir() + "\\" + "chrome_emr_socks_session" | |
cmd = "where" if platform.system() == "Windows" else "which" | |
tunneling = "putty" if platform.system() == "Windows" else "ssh" | |
try: | |
sp.call([cmd, tunneling]) | |
except: | |
raise Exception(tunneling + " is not found") | |
# This works on Windows, but doesn't yet work on Mac for example. | |
while not os.path.isfile(chromepath) and platform.system() == "Windows": | |
print("Chrome not found.") | |
chromepath = 'not found' | |
chromepath = input('Full path to chrome [' + chromepath +']') | |
cluster_name = settings['clustername'] if 'clustername' in settings else 'Unnamed test cluster' | |
cluster_name_input = input('Name your cluster ['+ cluster_name +']: ') | |
if cluster_name_input != "": | |
cluster_name = cluster_name_input | |
# store all settings | |
config['DEFAULT']['chromepath'] = chromepath | |
config['DEFAULT']['clustername'] = cluster_name | |
config['DEFAULT']['keyname'] = keyname | |
config['DEFAULT']['region'] = region | |
config['DEFAULT']['subnet'] = subnet | |
config['DEFAULT']['instances'] = str(instances) | |
config['DEFAULT']['instance_type'] = instance_type | |
config['DEFAULT']['apps'] = json.dumps(apps) | |
config['DEFAULT']['profile'] = profile | |
config['DEFAULT']['release_label'] = release_label | |
with open('config.ini', 'w') as configfile: | |
config.write(configfile) | |
if not os.path.isfile(keyname + key_extension): | |
raise Exception("keyfile " + keyname + key_extension + " not found in current directory.\n\nCheck config.ini" ) | |
import boto3 | |
session = boto3.Session(profile_name = profile) | |
# This just chooses a random subnet from the first VPC in the list, | |
# if a subnet wasn't specified. | |
import random | |
if subnet == "": | |
ec2 = session.resource('ec2') | |
first_vpc = list(ec2.vpcs.all())[0] | |
subnets = list(first_vpc.subnets.all()) | |
subnet = random.choice(subnets).id | |
client = session.client('emr', region_name = region) | |
response = client.run_job_flow( | |
Name=cluster_name, | |
ReleaseLabel=release_label, | |
Instances={ | |
'MasterInstanceType': instance_type, | |
'SlaveInstanceType': instance_type, | |
'InstanceCount': instances, | |
'KeepJobFlowAliveWhenNoSteps': True, | |
'TerminationProtected': False, | |
'Ec2SubnetId': subnet, | |
'Ec2KeyName': keyname, | |
}, | |
Applications=apps, | |
VisibleToAllUsers=True, | |
JobFlowRole='EMR_EC2_DefaultRole', | |
ServiceRole='EMR_DefaultRole' | |
) | |
job_flow_id = response['JobFlowId'] | |
import time | |
start = time.time() | |
cluster_info = client.describe_cluster(ClusterId = job_flow_id) | |
while cluster_info['Cluster']['Status']['State'] == 'STARTING': | |
elapsed = time.time() - start | |
print(("waiting... it has been %2.0f " % elapsed) + "seconds. Might take about 500 seconds." ) | |
time.sleep(20) | |
cluster_info = client.describe_cluster(ClusterId = job_flow_id) | |
print("done!") | |
master_public_dns_name = cluster_info['Cluster']['MasterPublicDnsName'] | |
#This opens the SSH tunnel | |
if platform.system() == 'Windows': | |
process_id = sp.Popen(['cmd.exe', '/c', 'echo', 'y', '|' | |
, 'plink.exe', '-i', keyfile, '-N', '-D', '8157', 'hadoop@' + master_public_dns_name], shell=True) | |
else: | |
# I haven't tested this, so apologies if it won't work. I think it will. | |
process_id = sp.Popen(['ssh', '-o', "'StrictHostKeyChecking no'" | |
, '-i', keyfile, '-N', '-D', '8157', 'hadoop@' + master_public_dns_name], shell=True) | |
#Open browser and wait. Kill stuff when you close browser. | |
command = [chromepath # e.g. 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe' | |
, '--proxy-server=socks5://127.0.0.1:8157' | |
, '--user-data-dir=' + profiledir # e.g. 'C:\\Users\\ILKKAP~1\\AppData\\Local\\Temp\\chrome_emr_socks_session' | |
, 'http://' + master_public_dns_name + ':8888' | |
] | |
sp.call(command, stdout=sp.PIPE,stderr=sp.PIPE) | |
should_stop = input("Terminating the cluster unless you type 'no'") | |
if should_stop != 'no': | |
print("stopping everything") | |
# Stop the SSH tunnel | |
process_id.kill() | |
# Terminate the cluster | |
response = client.terminate_job_flows( | |
JobFlowIds=[ | |
job_flow_id, | |
] | |
) | |
else: | |
print("ok, but you need to take care of stopping the clusters yourself!") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment