Skip to content

Instantly share code, notes, and snippets.

Created August 3, 2017 08:15
Show Gist options
  • Save dasgoll/7d27e7722ff29f3e2e8806062ef33c83 to your computer and use it in GitHub Desktop.
Save dasgoll/7d27e7722ff29f3e2e8806062ef33c83 to your computer and use it in GitHub Desktop.
EMR cluster cloudformation template
"Conditions": {
"WithSpotPrice": {
"Fn::Not": [
"Fn::Equals": [
"Ref": "SpotPrice"
"Description": "Sample CloudFormation template for creating an EMR cluster",
"Parameters": {
"GcTimeRatioValue": {
"Default": "19",
"Description": "Hadoop name node garbage collector time ratio",
"Type": "Number"
"KeyName": {
"Description": "Name of an existing EC2 KeyPair to enable SSH to the instances",
"Type": "AWS::EC2::KeyPair::KeyName"
"SpotPrice": {
"Default": "0.1",
"Description": "Spot price (or use 0 for 'on demand' instance)",
"Type": "Number"
"Subnet": {
"Description": "Subnet ID for creating the EMR cluster",
"Type": "AWS::EC2::Subnet::Id"
"Resources": {
"EMRInstanceProfile": {
"Properties": {
"Roles": [
"Ref": "EMRJobFlowRole"
"Type": "AWS::IAM::InstanceProfile"
"EMRJobFlowRole": {
"Properties": {
"AssumeRolePolicyDocument": {
"Statement": [
"Action": [
"Effect": "Allow",
"Principal": {
"Service": [
"ManagedPolicyArns": [
"Type": "AWS::IAM::Role"
"EMRSampleCluster": {
"Properties": {
"Applications": [
"Name": "Hadoop"
"Name": "Hive"
"Name": "Mahout"
"Name": "Pig"
"Name": "Spark"
"AutoScalingRole": {
"Ref": "EMR_AutoScaling_DefaultRole"
"BootstrapActions": [
"Name": "Dummy bootstrap action",
"ScriptBootstrapAction": {
"Args": [
"Path": "file:/usr/share/aws/emr/scripts/install-hue"
"Configurations": [
"Classification": "core-site",
"ConfigurationProperties": {
"": "250"
"Classification": "mapred-site",
"ConfigurationProperties": {
"": "2",
"": "90",
"mapreduce.tasktracker.reduce.tasks.maximum": "5"
"Classification": "hadoop-env",
"Configurations": [
"Classification": "export",
"ConfigurationProperties": {
"Fn::Join": [
"Ref": "GcTimeRatioValue"
"Instances": {
"CoreInstanceGroup": {
"AutoScalingPolicy": {
"Constraints": {
"MaxCapacity": "3",
"MinCapacity": "1"
"Rules": [
"Action": {
"Market": "ON_DEMAND",
"SimpleScalingPolicyConfiguration": {
"AdjustmentType": "EXACT_CAPACITY",
"CoolDown": "300",
"ScalingAdjustment": "1"
"Description": "CoreAutoScalingPolicy rules",
"Name": "CoreAutoScalingPolicy",
"Trigger": {
"CloudWatchAlarmDefinition": {
"ComparisonOperator": "GREATER_THAN",
"Dimensions": [
"Key": "",
"Value": "my.custom.master.value"
"EvaluationPeriods": "120",
"MetricName": "TestMetric",
"Namespace": "AWS/ElasticMapReduce",
"Period": "300",
"Statistic": "AVERAGE",
"Threshold": "50",
"Unit": "PERCENT"
"BidPrice": {
"Fn::If": [
"Ref": "SpotPrice"
"Ref": "AWS::NoValue"
"EbsConfiguration": {
"EbsBlockDeviceConfigs": [
"VolumeSpecification": {
"SizeInGB": "10",
"VolumeType": "gp2"
"VolumesPerInstance": "1"
"EbsOptimized": "true"
"InstanceCount": "1",
"InstanceType": "m4.large",
"Market": {
"Fn::If": [
"Name": "Core Instance"
"Ec2KeyName": {
"Ref": "KeyName"
"Ec2SubnetId": {
"Ref": "Subnet"
"MasterInstanceGroup": {
"AutoScalingPolicy": {
"Constraints": {
"MaxCapacity": "3",
"MinCapacity": "1"
"Rules": [
"Action": {
"Market": "ON_DEMAND",
"SimpleScalingPolicyConfiguration": {
"AdjustmentType": "EXACT_CAPACITY",
"CoolDown": "300",
"ScalingAdjustment": "1"
"Description": "MasterAutoScalingPolicy rules",
"Name": "MasterAutoScalingPolicy",
"Trigger": {
"CloudWatchAlarmDefinition": {
"ComparisonOperator": "GREATER_THAN",
"Dimensions": [
"Key": "",
"Value": "my.custom.master.value"
"EvaluationPeriods": "120",
"MetricName": "TestMetric",
"Namespace": "AWS/ElasticMapReduce",
"Period": "300",
"Statistic": "AVERAGE",
"Threshold": "50",
"Unit": "PERCENT"
"InstanceCount": "1",
"InstanceType": "m4.large",
"Market": "ON_DEMAND",
"Name": "Master Instance"
"JobFlowRole": {
"Ref": "EMRInstanceProfile"
"Name": "EMR Sample Cluster",
"ReleaseLabel": "emr-4.4.0",
"SecurityConfiguration": {
"Ref": "EMRSecurityConfiguration"
"ServiceRole": {
"Ref": "EMRServiceRole"
"Tags": [
"Key": "Name",
"Value": "EMR Sample Cluster"
"VisibleToAllUsers": "true"
"Type": "AWS::EMR::Cluster"
"EMRSecurityConfiguration": {
"Properties": {
"Name": "EMRSampleClusterSecurityConfiguration",
"SecurityConfiguration": {
"EncryptionConfiguration": {
"AtRestEncryptionConfiguration": {
"LocalDiskEncryptionConfiguration": {
"AwsKmsKey": "arn:aws:kms:us-east-1:123456789012:key/1234-1234-1234-1234-1234",
"EncryptionKeyProviderType": "AwsKms"
"S3EncryptionConfiguration": {
"AwsKmsKey": "arn:aws:kms:us-east-1:123456789012:key/1234-1234-1234-1234-1234",
"EncryptionMode": "SSE-KMS"
"EnableAtRestEncryption": "true",
"EnableInTransitEncryption": "true",
"InTransitEncryptionConfiguration": {
"TLSCertificateConfiguration": {
"CertificateProviderType": "PEM",
"S3Object": "s3://MyConfigStore/artifacts/"
"Type": "AWS::EMR::SecurityConfiguration"
"EMRServiceRole": {
"Properties": {
"AssumeRolePolicyDocument": {
"Statement": [
"Action": [
"Effect": "Allow",
"Principal": {
"Service": [
"ManagedPolicyArns": [
"Type": "AWS::IAM::Role"
"TestStep": {
"Properties": {
"ActionOnFailure": "CONTINUE",
"HadoopJarStep": {
"Args": [
"Jar": "s3://emr-cfn-test/hadoop-mapreduce-examples-2.6.0.jar",
"MainClass": "pi",
"StepProperties": [
"Key": "",
"Value": "my.custom.value"
"JobFlowId": {
"Ref": "EMRSampleCluster"
"Name": "TestStep"
"Type": "AWS::EMR::Step"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment