Skip to content

Instantly share code, notes, and snippets.

@rohitgarg
Last active December 11, 2015 21:11
Show Gist options
  • Save rohitgarg/9c8071fc7c9eef6de879 to your computer and use it in GitHub Desktop.
Save rohitgarg/9c8071fc7c9eef6de879 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import boto
import boto.emr
from boto.emr.instance_group import InstanceGroup
from boto.emr.connection import EmrConnection
from boto.emr.bootstrap_action import BootstrapAction
from boto.emr.step import InstallHiveStep
from boto.emr.step import JarStep
from boto.emr.step import InstallHiveStep
conn = boto.emr.connect_to_region('us-east-1')
instance_groups = []
task_node_spotprice = 2
instance_groups = [
InstanceGroup(1, 'MASTER', 'm1.xlarge', 'ON_DEMAND', '', ''),
InstanceGroup(4, 'CORE', 'r3,4xlarge', 'ON_DEMAND', '', ''),
]
# if there are task nodes
instance_groups.append(
InstanceGroup(2, 'TASK', 'r3,4xlarge' , 'ON_DEMAND' if not task_node_spotprice else 'SPOT', '', task_node_spotprice)
)
#Hive installation
hive_install_step = InstallHiveStep();
bootstrap_actions_list = []
hadoop_monitor_bootstrapper = BootstrapAction('ganglia-config','s3://elasticmapreduce/bootstrap-actions/install-ganglia','')
logging.info("Bootstrapping Ganglia")
bootstrap_actions_list.append(hadoop_monitor_bootstrapper)
# Bootstrapping Tez
tez_bootstrapper = BootstrapAction('Install Tez','s3://support.elasticmapreduce/bootstrap-actions/ami/3.2.x/install-tez.beta','')
bootstrap_actions_list.append(tez_bootstrapper)
cluster_id = conn.run_jobflow(
"Tez Cluster",
instance_groups=instance_groups,
action_on_failure='TERMINATE_JOB_FLOW',
keep_alive=True,
enable_debugging=True,
log_uri="s3://mybucket/logs/",
hadoop_version=None,
ami_version="3.4.0",
steps=[hive_install_step],
ec2_keyname="my-ec2-key",
visible_to_all_users=True,
job_flow_role="EMR_EC2_DefaultRole",
bootstrap_actions=bootstrap_actions_list,
service_role="EMR_DefaultRole")
print "Starting Tez Cluster", cluster_id
# Setting Up Tez
tez_args = ['s3://support.elasticmapreduce/bootstrap-actions/ami/3.2.x/setup-tez.beta']
tezJarStep = JarStep('Setup Tez', 's3://elasticmapreduce/libs/script-runner/script-runner.jar', step_args=tez_args)
conn.add_jobflow_steps(job_id, [tezJarStep])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment