Spark EMR Cluster script
# elastic-mapreduce --create --name "Spark Example Project" --instance-type m1.xlarge --instance-count 3 \ | |
# --jobflow-role SparkRole --service-role EMR_DefaultRole \ | |
# --bootstrap-action s3://support.elasticmapreduce/spark/install-spark --bootstrap-name "Install Spark" \ | |
# --jar s3://elasticmapreduce/libs/script-runner/script-runner.jar --step-name "Run Spark Example Project" \ | |
# --step-action TERMINATE_JOB_FLOW \ | |
# --arg s3://snowplow-hosted-assets/common/spark/run-spark-job-0.1.0.sh \ | |
# --arg s3://sparkcluster/spark-example-project-0.2.0.jar \ | |
# --arg com.snowplowanalytics.spark.WordCountJob \ | |
# --arg s3n://sparkcluster/hello.txt \ | |
# --arg s3n://sparkcluster/results | |
aws emr create-cluster --name SparkCluster3 --ami-version 3.6 --instance-type m3.xlarge --instance-count 3 \ | |
--service-role EMR_DefaultRole \ | |
--ec2-attributes KeyName=sparkcluster,InstanceProfile=SparkRole --applications Name=Hive \ | |
--bootstrap-actions Path=s3://support.elasticmapreduce/spark/install-spark \ | |
--log-uri s3://rbt-sparkcluster/logs | |
aws emr add-steps --cluster-id j-1BTYATKY45YIX --steps \ | |
Name=SparkExample1,Jar=s3://rbt-sparkcluster/spark-example-project-0.2.0.jar,Args="[/home/hadoop/spark/bin/spark-submit,--deploy-mode,cluster,--master,yarn-cluster,--driver-memory,1G,--executor-memory,1G,--num-executors,4,--class,org.apache.spark.examples.JavaWordCount,s3://support.elasticmapreduce/spark/1.2.0/spark-examples-1.2.0-hadoop2.4.0.jar,s3://support.elasticmapreduce/spark/examples/wordcountdata]",ActionOnFailure=CONTINUE |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment