Created
September 9, 2021 11:46
-
-
Save dgtm/34a7c089a0e0ced41c125917c65cc1e4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
AWSTemplateFormatVersion: 2010-09-09 | |
Resources: | |
AWSCURDatabase: | |
Type: 'AWS::Glue::Database' | |
Properties: | |
DatabaseInput: | |
Name: 'athenacurcfn_myreport' | |
CatalogId: !Ref AWS::AccountId | |
AWSCURCrawlerComponentFunction: | |
Type: 'AWS::IAM::Role' | |
Properties: | |
AssumeRolePolicyDocument: | |
Version: 2012-10-17 | |
Statement: | |
- Effect: Allow | |
Principal: | |
Service: | |
- glue.amazonaws.com | |
Action: | |
- 'sts:AssumeRole' | |
Path: / | |
ManagedPolicyArns: | |
- !Sub 'arn:${AWS::Partition}:iam::aws:policy/service-role/AWSGlueServiceRole' | |
Policies: | |
- PolicyName: AWSCURCrawlerComponentFunction | |
PolicyDocument: | |
Version: 2012-10-17 | |
Statement: | |
- Effect: Allow | |
Action: | |
- 'logs:CreateLogGroup' | |
- 'logs:CreateLogStream' | |
- 'logs:PutLogEvents' | |
Resource: !Sub 'arn:${AWS::Partition}:logs:*:*:*' | |
- Effect: Allow | |
Action: | |
- 'glue:UpdateDatabase' | |
- 'glue:UpdatePartition' | |
- 'glue:CreateTable' | |
- 'glue:UpdateTable' | |
- 'glue:ImportCatalogToGlue' | |
Resource: '*' | |
- Effect: Allow | |
Action: | |
- 's3:GetObject' | |
- 's3:PutObject' | |
Resource: !Sub 'arn:${AWS::Partition}:s3:::dipeshgtmreport/dipeshgtm/myreport/myreport*' | |
AWSCURCrawlerLambdaExecutor: | |
Type: 'AWS::IAM::Role' | |
Properties: | |
AssumeRolePolicyDocument: | |
Version: 2012-10-17 | |
Statement: | |
- Effect: Allow | |
Principal: | |
Service: | |
- lambda.amazonaws.com | |
Action: | |
- 'sts:AssumeRole' | |
Path: / | |
Policies: | |
- PolicyName: AWSCURCrawlerLambdaExecutor | |
PolicyDocument: | |
Version: 2012-10-17 | |
Statement: | |
- Effect: Allow | |
Action: | |
- 'logs:CreateLogGroup' | |
- 'logs:CreateLogStream' | |
- 'logs:PutLogEvents' | |
Resource: !Sub 'arn:${AWS::Partition}:logs:*:*:*' | |
- Effect: Allow | |
Action: | |
- 'glue:StartCrawler' | |
Resource: '*' | |
AWSCURCrawler: | |
Type: 'AWS::Glue::Crawler' | |
DependsOn: | |
- AWSCURDatabase | |
- AWSCURCrawlerComponentFunction | |
Properties: | |
Name: AWSCURCrawler-myreport | |
Description: A recurring crawler that keeps your CUR table in Athena up-to-date. | |
Role: !GetAtt AWSCURCrawlerComponentFunction.Arn | |
DatabaseName: !Ref AWSCURDatabase | |
Targets: | |
S3Targets: | |
- Path: 's3://dipeshgtmreport/dipeshgtm/myreport/myreport' | |
Exclusions: | |
- '**.json' | |
- '**.yml' | |
- '**.sql' | |
- '**.csv' | |
- '**.gz' | |
- '**.zip' | |
SchemaChangePolicy: | |
UpdateBehavior: UPDATE_IN_DATABASE | |
DeleteBehavior: DELETE_FROM_DATABASE | |
AWSCURInitializer: | |
Type: 'AWS::Lambda::Function' | |
DependsOn: AWSCURCrawler | |
Properties: | |
Code: | |
ZipFile: > | |
const AWS = require('aws-sdk'); | |
const response = require('./cfn-response'); | |
exports.handler = function(event, context, callback) { | |
if (event.RequestType === 'Delete') { | |
response.send(event, context, response.SUCCESS); | |
} else { | |
const glue = new AWS.Glue(); | |
glue.startCrawler({ Name: 'AWSCURCrawler-myreport' }, function(err, data) { | |
if (err) { | |
const responseData = JSON.parse(this.httpResponse.body); | |
if (responseData['__type'] == 'CrawlerRunningException') { | |
callback(null, responseData.Message); | |
} else { | |
const responseString = JSON.stringify(responseData); | |
if (event.ResponseURL) { | |
response.send(event, context, response.FAILED,{ msg: responseString }); | |
} else { | |
callback(responseString); | |
} | |
} | |
} | |
else { | |
if (event.ResponseURL) { | |
response.send(event, context, response.SUCCESS); | |
} else { | |
callback(null, response.SUCCESS); | |
} | |
} | |
}); | |
} | |
}; | |
Handler: 'index.handler' | |
Timeout: 30 | |
Runtime: nodejs12.x | |
ReservedConcurrentExecutions: 1 | |
Role: !GetAtt AWSCURCrawlerLambdaExecutor.Arn | |
AWSStartCURCrawler: | |
Type: 'Custom::AWSStartCURCrawler' | |
Properties: | |
ServiceToken: !GetAtt AWSCURInitializer.Arn | |
AWSS3CUREventLambdaPermission: | |
Type: AWS::Lambda::Permission | |
Properties: | |
Action: 'lambda:InvokeFunction' | |
FunctionName: !GetAtt AWSCURInitializer.Arn | |
Principal: 's3.amazonaws.com' | |
SourceAccount: !Ref AWS::AccountId | |
SourceArn: !Sub 'arn:${AWS::Partition}:s3:::dipeshgtmreport' | |
AWSS3CURLambdaExecutor: | |
Type: 'AWS::IAM::Role' | |
Properties: | |
AssumeRolePolicyDocument: | |
Version: 2012-10-17 | |
Statement: | |
- Effect: Allow | |
Principal: | |
Service: | |
- lambda.amazonaws.com | |
Action: | |
- 'sts:AssumeRole' | |
Path: / | |
Policies: | |
- PolicyName: AWSS3CURLambdaExecutor | |
PolicyDocument: | |
Version: 2012-10-17 | |
Statement: | |
- Effect: Allow | |
Action: | |
- 'logs:CreateLogGroup' | |
- 'logs:CreateLogStream' | |
- 'logs:PutLogEvents' | |
Resource: !Sub 'arn:${AWS::Partition}:logs:*:*:*' | |
- Effect: Allow | |
Action: | |
- 's3:PutBucketNotification' | |
Resource: !Sub 'arn:${AWS::Partition}:s3:::dipeshgtmreport' | |
AWSS3CURNotification: | |
Type: 'AWS::Lambda::Function' | |
DependsOn: | |
- AWSCURInitializer | |
- AWSS3CUREventLambdaPermission | |
- AWSS3CURLambdaExecutor | |
Properties: | |
Code: | |
ZipFile: > | |
const AWS = require('aws-sdk'); | |
const response = require('./cfn-response'); | |
exports.handler = function(event, context, callback) { | |
const s3 = new AWS.S3(); | |
const putConfigRequest = function(notificationConfiguration) { | |
return new Promise(function(resolve, reject) { | |
s3.putBucketNotificationConfiguration({ | |
Bucket: event.ResourceProperties.BucketName, | |
NotificationConfiguration: notificationConfiguration | |
}, function(err, data) { | |
if (err) reject({ msg: this.httpResponse.body.toString(), error: err, data: data }); | |
else resolve(data); | |
}); | |
}); | |
}; | |
const newNotificationConfig = {}; | |
if (event.RequestType !== 'Delete') { | |
newNotificationConfig.LambdaFunctionConfigurations = [{ | |
Events: [ 's3:ObjectCreated:*' ], | |
LambdaFunctionArn: event.ResourceProperties.TargetLambdaArn || 'missing arn', | |
Filter: { Key: { FilterRules: [ { Name: 'prefix', Value: event.ResourceProperties.ReportKey } ] } } | |
}]; | |
} | |
putConfigRequest(newNotificationConfig).then(function(result) { | |
response.send(event, context, response.SUCCESS, result); | |
callback(null, result); | |
}).catch(function(error) { | |
response.send(event, context, response.FAILED, error); | |
console.log(error); | |
callback(error); | |
}); | |
}; | |
Handler: 'index.handler' | |
Timeout: 30 | |
Runtime: nodejs12.x | |
ReservedConcurrentExecutions: 1 | |
Role: !GetAtt AWSS3CURLambdaExecutor.Arn | |
AWSPutS3CURNotification: | |
Type: 'Custom::AWSPutS3CURNotification' | |
Properties: | |
ServiceToken: !GetAtt AWSS3CURNotification.Arn | |
TargetLambdaArn: !GetAtt AWSCURInitializer.Arn | |
BucketName: 'dipeshgtmreport' | |
ReportKey: 'dipeshgtm/myreport/myreport' | |
AWSCURReportStatusTable: | |
Type: 'AWS::Glue::Table' | |
DependsOn: AWSCURDatabase | |
Properties: | |
DatabaseName: athenacurcfn_myreport | |
CatalogId: !Ref AWS::AccountId | |
TableInput: | |
Name: 'cost_and_usage_data_status' | |
TableType: 'EXTERNAL_TABLE' | |
StorageDescriptor: | |
Columns: | |
- Name: status | |
Type: 'string' | |
InputFormat: 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' | |
OutputFormat: 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' | |
SerdeInfo: | |
SerializationLibrary: 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' | |
Location: 's3://dipeshgtmreport/dipeshgtm/myreport/cost_and_usage_data_status/' | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment