Skip to content

Instantly share code, notes, and snippets.

@rayl15
Created December 21, 2024 06:24
Show Gist options
  • Save rayl15/b1366e8165c98a562d1b84e031ab8b59 to your computer and use it in GitHub Desktop.
Save rayl15/b1366e8165c98a562d1b84e031ab8b59 to your computer and use it in GitHub Desktop.
Description: CloudFormation template to create AWS Lake Formation workshop resources
Metadata:
'AWS::CloudFormation::Interface':
ParameterGroups:
- Label:
default: Misc Configuration
Parameters:
- EEKeyPair
- LatestAmiId
ParameterLabels:
EEKeyPair:
default: EC2 Key Pair
LatestAmiId:
default: Latest AMI Id
Mappings:
SubnetConfig:
VPC:
CIDR: 10.0.0.0/16
PublicOne:
CIDR: 10.0.0.0/24
Constants:
EC2InstanceType:
Name: t2.small
AutoHibernateTimeout:
Name: 30
Parameters:
EEKeyPair:
Description: Amazon EC2 Key Pair
Type: 'AWS::EC2::KeyPair::KeyName'
MinLength: 1
LatestAmiId:
Type: 'AWS::SSM::Parameter::Value<AWS::EC2::Image::Id>'
Default: /aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-gp2
AllowedValues:
- /aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-gp2
Description: Image ID for the EC2 helper instance. DO NOT change this.
Resources:
LakeFormationVPC:
Type: 'AWS::EC2::VPC'
Properties:
CidrBlock: !FindInMap
- SubnetConfig
- VPC
- CIDR
InstanceTenancy: default
EnableDnsSupport: 'true'
EnableDnsHostnames: 'true'
Tags:
- Key: Name
Value: LF-Workshop-VPC
PublicSubnetOne:
Type: 'AWS::EC2::Subnet'
Properties:
CidrBlock: !FindInMap
- SubnetConfig
- PublicOne
- CIDR
MapPublicIpOnLaunch: 'true'
AvailabilityZone: !Select
- 0
- !GetAZs
Ref: 'AWS::Region'
VpcId: !Ref LakeFormationVPC
Tags:
- Key: Name
Value: LF-Workshop-PublicSubnetOne
InternetGateway:
Type: 'AWS::EC2::InternetGateway'
Properties:
Tags:
- Key: Name
Value: LF-Workshop-IG
VPCGatewayAttachment:
Type: 'AWS::EC2::VPCGatewayAttachment'
Properties:
VpcId: !Ref LakeFormationVPC
InternetGatewayId: !Ref InternetGateway
InternetGatewayRouteTable:
Type: 'AWS::EC2::RouteTable'
Properties:
VpcId: !Ref LakeFormationVPC
InternetGatewayRoute:
Type: 'AWS::EC2::Route'
DependsOn: VPCGatewayAttachment
Properties:
DestinationCidrBlock: 0.0.0.0/0
RouteTableId: !Ref InternetGatewayRouteTable
GatewayId: !Ref InternetGateway
PublicSubnetOneRouteTableAssociation:
Type: 'AWS::EC2::SubnetRouteTableAssociation'
Properties:
RouteTableId: !Ref InternetGatewayRouteTable
SubnetId: !Ref PublicSubnetOne
GlueServiceRole:
Type: 'AWS::IAM::Role'
Properties:
AssumeRolePolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Principal:
Service:
- glue.amazonaws.com
- lakeformation.amazonaws.com
- firehose.amazonaws.com
Action: 'sts:AssumeRole'
ManagedPolicyArns:
- 'arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole'
Policies:
- PolicyName: LF-Data-Lake-Storage-Policy
PolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Action:
- 's3:*'
Resource:
- !Join
- ''
- - 'arn:aws:s3:::'
- !Ref DataLakeBucket
- /*
- !Join
- ''
- - 'arn:aws:s3:::'
- !Ref DataLakeBucket
- PolicyName: Glue-Demo-Access-Policy
PolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Action:
- 's3:*'
Resource:
- !Join
- ''
- - 'arn:aws:s3:::'
- !Ref LFWorkshopBucket
- /*
- !Join
- ''
- - 'arn:aws:s3:::'
- !Ref LFWorkshopBucket
- PolicyName: LF-DataAccess-Policy
PolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Action:
- 'lakeformation:GetDataAccess'
- 'lakeformation:GrantPermissions'
Resource: '*'
- PolicyName: LF-Workflow-Policy
PolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Action:
- 'iam:PassRole'
Resource:
- !Join
- ''
- - 'arn:aws:iam::'
- !Ref 'AWS::AccountId'
- ':role/LF-GlueServiceRole'
- !Join
- ''
- - 'arn:aws:iam::'
- !Ref 'AWS::AccountId'
- ':role/LakeFormationWorkflowRole'
RoleName: LF-GlueServiceRole
DataEngineerGlueServiceRole:
Type: 'AWS::IAM::Role'
Properties:
AssumeRolePolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Principal:
Service:
- glue.amazonaws.com
- lakeformation.amazonaws.com
- firehose.amazonaws.com
Action: 'sts:AssumeRole'
ManagedPolicyArns:
- 'arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole'
- 'arn:aws:iam::aws:policy/AmazonKinesisFullAccess'
Policies:
- PolicyName: DE-Data-Lake-Storage-Policy
PolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Action:
- 's3:*'
Resource:
- !Join
- ''
- - 'arn:aws:s3:::'
- !Ref DataLakeBucket
- /*
- !Join
- ''
- - 'arn:aws:s3:::'
- !Ref DataLakeBucket
- PolicyName: DE-Glue-Access-Policy
PolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Action:
- 's3:*'
Resource:
- !Join
- ''
- - 'arn:aws:s3:::'
- !Ref LFWorkshopBucket
- /*
- !Join
- ''
- - 'arn:aws:s3:::'
- !Ref LFWorkshopBucket
- PolicyName: DE-DataAccess-Policy
PolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Action:
- 'lakeformation:GetDataAccess'
- 'lakeformation:GrantPermissions'
Resource: '*'
- PolicyName: DA-Workflow-Policy
PolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Action:
- 'iam:PassRole'
Resource:
- !Join
- ''
- - 'arn:aws:iam::'
- !Ref 'AWS::AccountId'
- ':role/DE-GlueServiceRole'
- !Join
- ''
- - 'arn:aws:iam::'
- !Ref 'AWS::AccountId'
- ':role/LakeFormationWorkflowRole'
RoleName: DE-GlueServiceRole
DataLakeBucket:
Type: 'AWS::S3::Bucket'
Properties:
OwnershipControls:
Rules:
- ObjectOwnership: BucketOwnerPreferred
BucketName: !Join
- '-'
- - lf-data-lake
- !Ref 'AWS::AccountId'
LFWorkshopBucket:
Type: 'AWS::S3::Bucket'
Properties:
OwnershipControls:
Rules:
- ObjectOwnership: BucketOwnerPreferred
BucketName: !Join
- '-'
- - lf-workshop
- !Ref 'AWS::AccountId'
LFUsersPassword:
Type: 'AWS::SecretsManager::Secret'
Properties:
Description: Secret password for all workshop users
Name: !Sub '${AWS::StackName}-lf-users-credentials'
GenerateSecretString:
SecretStringTemplate: '{"username":"all-lf-users"}'
GenerateStringKey: password
PasswordLength: 16
ExcludeCharacters: '"@/\'
DataAdminUser:
Type: 'AWS::IAM::User'
Properties:
Path: /
LoginProfile:
Password: !Sub '{{resolve:secretsmanager:${LFUsersPassword}::password}}'
PasswordResetRequired: false
Policies:
- PolicyName: LF-DataLake-Admin-Policy
PolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Action: 'iam:CreateServiceLinkedRole'
Resource: '*'
Condition:
StringEquals:
'iam:AWSServiceName': lakeformation.amazonaws.com
- Effect: Allow
Action:
- 'iam:PutRolePolicy'
Resource: !Join
- ''
- - 'arn:aws:iam::'
- !Ref 'AWS::AccountId'
- >-
:role/aws-service-role/lakeformation.amazonaws.com/AWSServiceRoleForLakeFormationDataAccess
- Effect: Allow
Action: 'iam:PassRole'
Resource:
- 'arn:aws:iam::*:role/LF-GlueServiceRole'
- PolicyName: LF-DataLake-Admin-RAM-Invitation-Policy
PolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Action:
- 'ram:AcceptResourceShareInvitation'
- 'ram:RejectResourceShareInvitation'
- 'ec2:DescribeAvailabilityZones'
- 'ram:EnableSharingWithAwsOrganization'
Resource: '*'
ManagedPolicyArns:
- 'arn:aws:iam::aws:policy/AWSLakeFormationDataAdmin'
- 'arn:aws:iam::aws:policy/AWSGlueConsoleFullAccess'
- 'arn:aws:iam::aws:policy/AWSLakeFormationCrossAccountManager'
UserName: lf-data-admin
DataEngineerUser:
Type: 'AWS::IAM::User'
Properties:
Path: /
LoginProfile:
Password: !Sub '{{resolve:secretsmanager:${LFUsersPassword}::password}}'
PasswordResetRequired: false
Policies:
- PolicyName: LF-Athena-Query-Result-Policy
PolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Action:
- 's3:Put*'
- 's3:Get*'
- 's3:List*'
Resource:
- !Join
- ''
- - 'arn:aws:s3:::'
- !Ref LFWorkshopBucket
- /athena-results/*
- PolicyName: LF-PassRole
PolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Action: 'iam:PassRole'
Resource:
- 'arn:aws:iam::*:role/DE-GlueServiceRole'
ManagedPolicyArns:
- 'arn:aws:iam::aws:policy/AmazonAthenaFullAccess'
- 'arn:aws:iam::aws:policy/AWSGlueConsoleFullAccess'
UserName: lf-data-engineer
EC2Role:
Type: 'AWS::IAM::Role'
Properties:
Path: /
RoleName: EC2Role
AssumeRolePolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Principal:
Service:
- ec2.amazonaws.com
Action: 'sts:AssumeRole'
ManagedPolicyArns:
- 'arn:aws:iam::aws:policy/AdministratorAccess'
EC2InstanceProfile:
Type: 'AWS::IAM::InstanceProfile'
Properties:
Path: /
Roles:
- !Ref EC2Role
BackUpRestoreInstance:
Type: 'AWS::EC2::Instance'
Properties:
InstanceType: t2.small
ImageId: !Ref LatestAmiId
KeyName: !Ref EEKeyPair
IamInstanceProfile: !Ref EC2InstanceProfile
NetworkInterfaces:
- AssociatePublicIpAddress: 'true'
DeviceIndex: '0'
SubnetId: !Ref PublicSubnetOne
UserData: !Base64
'Fn::Join':
- ''
- - |
#!/bin/bash -ex
- |
sudo yum -y install mysql
- >
aws s3 cp
s3://aws-data-analytics-workshops/shared_datasets/tpcparquet/
/tmp/tpcparquet/ --recursive
- 'aws s3 cp /tmp/tpcparquet/ s3://'
- !Ref DataLakeBucket
- |
/tpcparquet/ --recursive
- |+
- >
aws s3 cp
s3://aws-data-analytics-workshops/lake-formation-workshop/sampledata/tripdata.csv
/tmp/
- 'aws s3 cp /tmp/tripdata.csv s3://'
- !Ref LFWorkshopBucket
- |
/glue/nyctaxi/
- >
aws s3 cp
s3://aws-data-analytics-workshops/lake-formation-workshop/scripts/nyctaxi-csv-to-json.py
/tmp/
- 'aws s3 cp /tmp/nyctaxi-csv-to-json.py s3://'
- !Ref LFWorkshopBucket
- |
/glue/scripts/
Tags:
- Key: Name
Value: EC2-DB-Loader
GlueCrawler:
Type: 'AWS::Glue::Crawler'
Properties:
DatabaseName: tpc
Description: AWS Glue Crawler to crawl parquet data
Name: TPC Crawler
Role: !GetAtt
- GlueServiceRole
- Arn
Targets:
S3Targets:
- Path: !Join
- ''
- - !Ref DataLakeBucket
- /tpcparquet/
SchemaChangePolicy:
UpdateBehavior: UPDATE_IN_DATABASE
DeleteBehavior: LOG
Outputs:
LFDataLakeBucketName:
Description: Lake Formation Data Lake Bucket Name
Value: !Ref DataLakeBucket
LFWorkshopBucketName:
Description: Lake Formation Workshop Bucket Name
Value: !Ref LFWorkshopBucket
AthenaQueryResultLocation:
Description: Athena Query Result Location
Value: !Join
- ''
- - 's3://'
- !Ref LFWorkshopBucket
- /athena-results/
MetadataLocation:
Description: Metadata Location
Value: !Join
- ''
- - 's3://'
- !Ref LFWorkshopBucket
- /metadata
LFUsersCredentials:
Description: AWS Secrets Manager Secret Name for user credentials
Value: !Sub >-
https://${AWS::Region}.console.aws.amazon.com/secretsmanager/secret?name=${AWS::StackName}-lf-users-credentials
ConsoleIAMLoginUrl:
Description: Console IAM Login URL to try out different users
Value: !Join
- ''
- - 'https://'
- !Ref 'AWS::AccountId'
- .signin.aws.amazon.com/console
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment