snehamehrin/Execute_Stack_Processing_Script.py

## create_emr_cluster.py
import boto3
import setup

def create_emr():
    client = boto3.client('emr', region_name='us-east-1')

    cluster_id = client.run_job_flow(Name='stackoverflow', ReleaseLabel='emr-5.18.0',
                                         Applications=[
                                             {
                                                 'Name': 'Spark'
                                             },
                                             {
                                                 'Name':'hue'
                                             },
                                             {
                                                 'Name': 'hive'
                                             },
                                             {
                                                 'Name': 'presto'
                                             }
                                         ],
                                         Instances={
                                             'InstanceGroups': [
                                                 {
                                                     'Name': "Master",
                                                     'Market': 'SPOT',
                                                     'InstanceRole': 'MASTER',
                                                     'InstanceType': 'm1.xlarge',
                                                     'InstanceCount': 1,
                                                 },
                                                 {
                                                     'Name': "Slave",
                                                     'Market': 'SPOT',
                                                     'InstanceRole': 'CORE',
                                                     'InstanceType': 'm1.xlarge',
                                                     'InstanceCount': 1,
                                                 }
                                             ],

                                             'Ec2KeyName': setup.key_name,
                                             'KeepJobFlowAliveWhenNoSteps': True,
                                             'TerminationProtected': False,


                                         },
                                        VisibleToAllUsers = True,
                                        JobFlowRole = 'EMR_EC2_DefaultRole',
                                        ServiceRole = 'EMR_DefaultRole',
                                     Steps=[

                                         {
                                             'Name': 'Copy Processing Script',
                                             'ActionOnFailure': 'TERMINATE_CLUSTER',
                                             'HadoopJarStep': {
                                                 'Jar': 'command-runner.jar',
                                                 'Args': ['aws', 's3', 'cp', 's3://stack-overflow-bucket/scripts/stack-processing.py', '/home/hadoop/']
                                             }
                                         },
                                         {
                                             'Name': 'Copy Executing Script',
                                             'ActionOnFailure': 'TERMINATE_CLUSTER',
                                             'HadoopJarStep': {
                                                 'Jar': 'command-runner.jar',
                                                 'Args': ['aws', 's3', 'cp', 's3://stack-overflow-bucket/scripts/Execute.sh','/home/hadoop/']
                                             }
                                         },
                                         {
                                             'Name': 'Copy Jar',
                                             'ActionOnFailure': 'TERMINATE_CLUSTER',
                                             'HadoopJarStep': {
                                                 'Jar': 'command-runner.jar',
                                                 'Args': ['aws', 's3', 'cp',
                                                          's3://stack-overflow-bucket/scripts/RedshiftJDBC42-no-awssdk-1.2.20.1043.jar', '/home/hadoop/']
                                             }
                                         }


                                     ]
                                     )
    response = client.list_clusters(

        ClusterStates=['RUNNING'
        ],

    )
    print(response)


create_emr()

## Execute_Stack_Processing_Script.py
import boto3
import setup

def run_jobs():
    client = boto3.client('emr', region_name='us-east-1')
    response = client.list_clusters(

        ClusterStates=['WAITING'
                       ],

    )
    for cluster in response['Clusters']:
        cluster_id=cluster['Id']
        response = client.add_job_flow_steps(
            JobFlowId=cluster_id,
            Steps=[
                {
                    'Name': 'Execute Script',
                    'ActionOnFailure': 'CANCEL_AND_WAIT',
                    'HadoopJarStep': {
                        'Jar': 'command-runner.jar',
                        'Args': ['bash', '/home/hadoop/Execute.sh']
                    }
                }

            ]
        )
        print(response)

run_jobs()
	import boto3
	import setup

	def create_emr():
	client = boto3.client('emr', region_name='us-east-1')

	cluster_id = client.run_job_flow(Name='stackoverflow', ReleaseLabel='emr-5.18.0',
	Applications=[
	{
	'Name': 'Spark'
	},
	{
	'Name':'hue'
	},
	{
	'Name': 'hive'
	},
	{
	'Name': 'presto'
	}
	],
	Instances={
	'InstanceGroups': [
	{
	'Name': "Master",
	'Market': 'SPOT',
	'InstanceRole': 'MASTER',
	'InstanceType': 'm1.xlarge',
	'InstanceCount': 1,
	},
	{
	'Name': "Slave",
	'Market': 'SPOT',
	'InstanceRole': 'CORE',
	'InstanceType': 'm1.xlarge',
	'InstanceCount': 1,
	}
	],

	'Ec2KeyName': setup.key_name,
	'KeepJobFlowAliveWhenNoSteps': True,
	'TerminationProtected': False,



	},
	VisibleToAllUsers = True,
	JobFlowRole = 'EMR_EC2_DefaultRole',
	ServiceRole = 'EMR_DefaultRole',
	Steps=[

	{
	'Name': 'Copy Processing Script',
	'ActionOnFailure': 'TERMINATE_CLUSTER',
	'HadoopJarStep': {
	'Jar': 'command-runner.jar',
	'Args': ['aws', 's3', 'cp', 's3://stack-overflow-bucket/scripts/stack-processing.py', '/home/hadoop/']
	}
	},
	{
	'Name': 'Copy Executing Script',
	'ActionOnFailure': 'TERMINATE_CLUSTER',
	'HadoopJarStep': {
	'Jar': 'command-runner.jar',
	'Args': ['aws', 's3', 'cp', 's3://stack-overflow-bucket/scripts/Execute.sh','/home/hadoop/']
	}
	},
	{
	'Name': 'Copy Jar',
	'ActionOnFailure': 'TERMINATE_CLUSTER',
	'HadoopJarStep': {
	'Jar': 'command-runner.jar',
	'Args': ['aws', 's3', 'cp',
	's3://stack-overflow-bucket/scripts/RedshiftJDBC42-no-awssdk-1.2.20.1043.jar', '/home/hadoop/']
	}
	}




	]
	)
	response = client.list_clusters(

	ClusterStates=['RUNNING'
	],

	)
	print(response)


	create_emr()
	import boto3
	import setup

	def run_jobs():
	client = boto3.client('emr', region_name='us-east-1')
	response = client.list_clusters(

	ClusterStates=['WAITING'
	],

	)
	for cluster in response['Clusters']:
	cluster_id=cluster['Id']
	response = client.add_job_flow_steps(
	JobFlowId=cluster_id,
	Steps=[
	{
	'Name': 'Execute Script',
	'ActionOnFailure': 'CANCEL_AND_WAIT',
	'HadoopJarStep': {
	'Jar': 'command-runner.jar',
	'Args': ['bash', '/home/hadoop/Execute.sh']
	}
	}

	]
	)
	print(response)

	run_jobs()