#!/usr/bin/env python | |
import boto | |
import boto.emr | |
from boto.emr.instance_group import InstanceGroup | |
from boto.emr.connection import EmrConnection | |
from boto.emr.bootstrap_action import BootstrapAction | |
from boto.emr.step import InstallHiveStep | |
from boto.emr.step import JarStep | |
from boto.emr.step import InstallHiveStep | |
conn = boto.emr.connect_to_region('us-east-1') | |
instance_groups = [] | |
task_node_spotprice = 2 | |
instance_groups = [ | |
InstanceGroup(1, 'MASTER', 'm1.xlarge', 'ON_DEMAND', '', ''), | |
InstanceGroup(4, 'CORE', 'r3,4xlarge', 'ON_DEMAND', '', ''), | |
] | |
# if there are task nodes | |
instance_groups.append( | |
InstanceGroup(2, 'TASK', 'r3,4xlarge' , 'ON_DEMAND' if not task_node_spotprice else 'SPOT', '', task_node_spotprice) | |
) | |
#Hive installation | |
hive_install_step = InstallHiveStep(); | |
bootstrap_actions_list = [] | |
hadoop_monitor_bootstrapper = BootstrapAction('ganglia-config','s3://elasticmapreduce/bootstrap-actions/install-ganglia','') | |
logging.info("Bootstrapping Ganglia") | |
bootstrap_actions_list.append(hadoop_monitor_bootstrapper) | |
# Bootstrapping Tez | |
tez_bootstrapper = BootstrapAction('Install Tez','s3://support.elasticmapreduce/bootstrap-actions/ami/3.2.x/install-tez.beta','') | |
bootstrap_actions_list.append(tez_bootstrapper) | |
cluster_id = conn.run_jobflow( | |
"Tez Cluster", | |
instance_groups=instance_groups, | |
action_on_failure='TERMINATE_JOB_FLOW', | |
keep_alive=True, | |
enable_debugging=True, | |
log_uri="s3://mybucket/logs/", | |
hadoop_version=None, | |
ami_version="3.4.0", | |
steps=[hive_install_step], | |
ec2_keyname="my-ec2-key", | |
visible_to_all_users=True, | |
job_flow_role="EMR_EC2_DefaultRole", | |
bootstrap_actions=bootstrap_actions_list, | |
service_role="EMR_DefaultRole") | |
print "Starting Tez Cluster", cluster_id | |
# Setting Up Tez | |
tez_args = ['s3://support.elasticmapreduce/bootstrap-actions/ami/3.2.x/setup-tez.beta'] | |
tezJarStep = JarStep('Setup Tez', 's3://elasticmapreduce/libs/script-runner/script-runner.jar', step_args=tez_args) | |
conn.add_jobflow_steps(job_id, [tezJarStep]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment