Skip to content

Instantly share code, notes, and snippets.

@robcowie
Created December 3, 2015 11:05
Show Gist options
  • Save robcowie/847a6d9ff4d4352561d1 to your computer and use it in GitHub Desktop.
Save robcowie/847a6d9ff4d4352561d1 to your computer and use it in GitHub Desktop.
boto3_driver
import boto3
# http://docs.aws.amazon.com/ElasticMapReduce/latest/ReleaseGuide/emr-spark-submit-step.html#dynamic-configuration
# https://github.com/grafke/Drone-workflow-controller/blob/7f40968f4164aede4e67070f5a4c0894dcc6d776/drone/actions/emr_launcher.py
# https://boto3.readthedocs.org/en/latest/reference/services/emr.html#EMR.Client.run_job_flow
# http://docs.aws.amazon.com/ElasticMapReduce/latest/ReleaseGuide/emr-spark-submit-step.html#dynamic-configuration
INSTANCE_CONFIG = {
'InstanceGroups': [
{
'Name': 'Master',
'Market': 'SPOT',
'InstanceRole': 'MASTER',
'BidPrice': '0.2',
'InstanceType': 'c3.2xlarge',
'InstanceCount': 1
},
{
'Name': 'Executors',
'Market': 'SPOT',
'InstanceRole': 'CORE',
'BidPrice': '0.2',
'InstanceType': 'c3.2xlarge',
'InstanceCount': 1
},
],
'Ec2KeyName': 'EMR',
'Placement': {
'AvailabilityZone': 'us-east-1e'
},
'KeepJobFlowAliveWhenNoSteps': False,
'TerminationProtected': False}
BOOTSTRAP_CONFIG = {}
STEP_CONFIG = [{
'Name': 'My App',
'ActionOnFailure': 'CONTINUE',
'HadoopJarStep': {
'Jar': 'command-runner.jar',
'Args': [
'spark-submit',
'path/to/my/app/run.py',
'--param 1'
]
}
}]
APPLICATIONS = ['Spark']
CONFIGURATIONS = {}
def main():
emr_client = boto3.client('emr')
aws_response = emr_client.run_job_flow(
Name='My Job',
LogUri='s3n://data.api.qa.us/rob/cookie_sync_logs/',
ReleaseLabel='emr-4.2.0',
Instances=INSTANCE_CONFIG,
Steps=STEP_CONFIG,
BootstrapActions=BOOTSTRAP_CONFIG,
Applications=APPLICATIONS,
Configurations=CONFIGURATIONS,
VisibleToAllUsers=True,
JobFlowRole='EMR_EC2_DefaultRole',
ServiceRole='EMR_DefaultRole'
)
print(aws_response)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment