Created
September 8, 2022 02:24
-
-
Save jfmcdowell/6f4aae2ab472b0bde3226367ea79c6d2 to your computer and use it in GitHub Desktop.
dml_pipeline
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from inspect import stack | |
import queue | |
from attr import attributes | |
from constructs import Construct | |
from aws_cdk import ( | |
Stack, | |
aws_lambda, | |
aws_lambda_event_sources, | |
aws_s3, aws_sqs, | |
aws_ec2, aws_dynamodb, | |
aws_iam, aws_s3_notifications, | |
aws_apigateway | |
) | |
#Stack to deploy a Infrastructure that the application requires | |
class DMLInfraStack(Stack): | |
def __init__(self, scope: Construct, id: str, **kwargs) -> None: | |
super().__init__(scope, id, **kwargs) | |
#Creates an AWS s3 bucket for initial input | |
dml_bucket = aws_s3.Bucket(self, "DML-s3-bucket1", | |
encryption=aws_s3.BucketEncryption.KMS, | |
bucket_key_enabled=True, | |
enforce_ssl=True | |
) | |
#Creates a bucket for highlighted content | |
dml_bucket_two = aws_s3.Bucket(self, "DocumentOutputBucket", | |
encryption=aws_s3.BucketEncryption.KMS, | |
bucket_key_enabled=True, | |
enforce_ssl=True | |
) | |
#Creates SQS queue for uploaded documents | |
null_queue = aws_sqs.Queue(self, "NullQueue" | |
) | |
#Defines an AWS Lambda resource | |
textract_function_two = aws_lambda.Function(self, "TextractFunctionTwo", | |
runtime=aws_lambda.Runtime.PYTHON_3_9, | |
handler="lambda_handler", | |
code=aws_lambda.Code.from_asset("lambda") | |
) | |
#Sends a notification to a Lambda function when an object is created in an S3 bucket | |
suffixes = [".pdf", ".txt", ".doc", ".docx"] | |
for suffix in suffixes: | |
dml_bucket.add_event_notification(aws_s3.EventType.OBJECT_CREATED,aws_s3_notifications.LambdaDestination(textract_function_two), | |
aws_s3.NotificationKeyFilter(prefix="subdir/", suffix=suffix) | |
) | |
#Creates VPC with private subnets | |
vpc = aws_ec2.Vpc(self, "DMLVPC") | |
###################################################################################################################################################################################################### | |
#Adds Doument DynamoDB table | |
document_table = aws_dynamodb.Table(self,"Document", | |
partition_key=aws_dynamodb.Attribute(name="DocumentID", type=aws_dynamodb.AttributeType.STRING), | |
sort_key=aws_dynamodb.Attribute(name="UpdatedDate", type=aws_dynamodb.AttributeType.NUMBER) | |
) | |
document_table.auto_scale_write_capacity( | |
min_capacity=1, | |
max_capacity=10).scale_on_utilization(target_utilization_percent=75 | |
) | |
#Adds DoumentHistory DynamoDB table | |
document_history_table = aws_dynamodb.Table(self, "DocumentHistory", | |
partition_key=aws_dynamodb.Attribute(name="DocumentID", type=aws_dynamodb.AttributeType.STRING), | |
sort_key=aws_dynamodb.Attribute(name="CompletedDate", type=aws_dynamodb.AttributeType.NUMBER) | |
) | |
document_history_table.auto_scale_write_capacity( | |
min_capacity=1, | |
max_capacity=10).scale_on_utilization(target_utilization_percent=75 | |
) | |
#Adds DoumentMetaData DynamoDB table | |
document_metadata_table = aws_dynamodb.Table(self, "DocumentMetaData", | |
partition_key=aws_dynamodb.Attribute(name="DocumentID", type=aws_dynamodb.AttributeType.STRING) | |
) | |
document_metadata_table.auto_scale_write_capacity( | |
min_capacity=1, | |
max_capacity=10).scale_on_utilization(target_utilization_percent=75 | |
) | |
###################################################################################################################################################################################################### | |
#Creates Lambda IAM Role | |
lambda_role = aws_iam.Role(self, "Lambda Role", | |
assumed_by=aws_iam.ServicePrincipal("lambda.amazonaws.com"), | |
description="Allows Lambda to access DynamoDB" | |
) | |
#Allows Lambda function to access DynamoDB | |
lambda_role.add_to_policy(aws_iam.PolicyStatement( | |
resources=[document_history_table.table_arn, document_table.table_arn,document_metadata_table.table_arn], | |
actions=["dynamodb:PutItem", "dynamodb:GetItem"] | |
) | |
) | |
#Allows Lambda function to access S3 | |
lambda_role.add_to_policy(aws_iam.PolicyStatement( | |
resources=[dml_bucket.bucket_arn, dml_bucket_two.bucket_arn], | |
actions=["s3:GetObject", "s3:PutObject"] | |
) | |
) | |
#Allows Lambda function to access Textract | |
lambda_role.add_to_policy(aws_iam.PolicyStatement( | |
resources=[textract_function_two.function_arn], | |
actions=["textract:StartDocumentTextDetection", | |
"textract:StartDocumentAnalysis", | |
"textract:GetDocumentTextDetection", | |
"textract:GetDocumentAnalysis"] | |
) | |
) | |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
#Creates DynamoDB role | |
dynamodb_role = aws_iam.Role(self, "DynamoDB Role", | |
assumed_by=aws_iam.ServicePrincipal("dynamodb.amazonaws.com"), | |
description= "Allows DynamoDB to access S3" | |
) | |
#Allows DynamoDB to access S3 | |
dynamodb_role.add_to_policy(aws_iam.PolicyStatement( | |
resources=[dml_bucket.bucket_arn, dml_bucket_two.bucket_arn], | |
actions=["s3:GetObject", "s3:PutObject"] | |
) | |
) | |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
#Creates S3 role | |
s3_role = aws_iam.Role(self, "S3 Role", | |
assumed_by=aws_iam.ServicePrincipal("s3.amazonaws.com"), | |
description= "Allows S3 to interact with Lambda" | |
) | |
#Allows s3 to access Textract | |
s3_role.add_to_policy(aws_iam.PolicyStatement( | |
resources=[textract_function_two.function_arn], | |
actions=["s3-object-lambda:WriteGetObjectResponse", | |
"logs:CreateLogGroup", | |
"logs:CreateLogStream", | |
"logs:PutLogEvents"] | |
) | |
) | |
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
#Creates Textract role | |
textract_role = aws_iam.Role(self, "Textract Role", | |
assumed_by=aws_iam.ServicePrincipal("textract.amazonaws.com"), | |
description="Allows Textract to access S3 and DynamoDB" | |
) | |
textract_role.add_to_policy(aws_iam.PolicyStatement( | |
resources=[dml_bucket.bucket_arn, dml_bucket_two.bucket_arn, textract_function_two.function_arn], | |
actions=["s3-object-lambda:WriteGetObjectResponse"] | |
) | |
) | |
#Alows Textract to access DynamoDB | |
textract_role.add_to_policy(aws_iam.PolicyStatement( | |
resources=[dml_bucket.bucket_arn, dml_bucket_two.bucket_arn, textract_function_two.function_arn], | |
actions=["dynamodb:PutItem","dynamodb:GetItem"] | |
) | |
) | |
#Allows Textract to access S3 | |
textract_role.add_to_policy(aws_iam.PolicyStatement( | |
resources=[dml_bucket.bucket_arn, dml_bucket_two.bucket_arn], | |
actions=["s3:GetObject","s3:PutObject"] | |
) | |
) | |
################################################################################################################################################################################################################## | |
#Defines API Gateway | |
aws_apigateway.LambdaRestApi(self, "AmplifyDocMessengerAPI", | |
handler=textract_function_two | |
) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment