Skip to content

Instantly share code, notes, and snippets.

@jfmcdowell
Created September 8, 2022 02:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jfmcdowell/6f4aae2ab472b0bde3226367ea79c6d2 to your computer and use it in GitHub Desktop.
Save jfmcdowell/6f4aae2ab472b0bde3226367ea79c6d2 to your computer and use it in GitHub Desktop.
dml_pipeline
from inspect import stack
import queue
from attr import attributes
from constructs import Construct
from aws_cdk import (
Stack,
aws_lambda,
aws_lambda_event_sources,
aws_s3, aws_sqs,
aws_ec2, aws_dynamodb,
aws_iam, aws_s3_notifications,
aws_apigateway
)
#Stack to deploy a Infrastructure that the application requires
class DMLInfraStack(Stack):
def __init__(self, scope: Construct, id: str, **kwargs) -> None:
super().__init__(scope, id, **kwargs)
#Creates an AWS s3 bucket for initial input
dml_bucket = aws_s3.Bucket(self, "DML-s3-bucket1",
encryption=aws_s3.BucketEncryption.KMS,
bucket_key_enabled=True,
enforce_ssl=True
)
#Creates a bucket for highlighted content
dml_bucket_two = aws_s3.Bucket(self, "DocumentOutputBucket",
encryption=aws_s3.BucketEncryption.KMS,
bucket_key_enabled=True,
enforce_ssl=True
)
#Creates SQS queue for uploaded documents
null_queue = aws_sqs.Queue(self, "NullQueue"
)
#Defines an AWS Lambda resource
textract_function_two = aws_lambda.Function(self, "TextractFunctionTwo",
runtime=aws_lambda.Runtime.PYTHON_3_9,
handler="lambda_handler",
code=aws_lambda.Code.from_asset("lambda")
)
#Sends a notification to a Lambda function when an object is created in an S3 bucket
suffixes = [".pdf", ".txt", ".doc", ".docx"]
for suffix in suffixes:
dml_bucket.add_event_notification(aws_s3.EventType.OBJECT_CREATED,aws_s3_notifications.LambdaDestination(textract_function_two),
aws_s3.NotificationKeyFilter(prefix="subdir/", suffix=suffix)
)
#Creates VPC with private subnets
vpc = aws_ec2.Vpc(self, "DMLVPC")
######################################################################################################################################################################################################
#Adds Doument DynamoDB table
document_table = aws_dynamodb.Table(self,"Document",
partition_key=aws_dynamodb.Attribute(name="DocumentID", type=aws_dynamodb.AttributeType.STRING),
sort_key=aws_dynamodb.Attribute(name="UpdatedDate", type=aws_dynamodb.AttributeType.NUMBER)
)
document_table.auto_scale_write_capacity(
min_capacity=1,
max_capacity=10).scale_on_utilization(target_utilization_percent=75
)
#Adds DoumentHistory DynamoDB table
document_history_table = aws_dynamodb.Table(self, "DocumentHistory",
partition_key=aws_dynamodb.Attribute(name="DocumentID", type=aws_dynamodb.AttributeType.STRING),
sort_key=aws_dynamodb.Attribute(name="CompletedDate", type=aws_dynamodb.AttributeType.NUMBER)
)
document_history_table.auto_scale_write_capacity(
min_capacity=1,
max_capacity=10).scale_on_utilization(target_utilization_percent=75
)
#Adds DoumentMetaData DynamoDB table
document_metadata_table = aws_dynamodb.Table(self, "DocumentMetaData",
partition_key=aws_dynamodb.Attribute(name="DocumentID", type=aws_dynamodb.AttributeType.STRING)
)
document_metadata_table.auto_scale_write_capacity(
min_capacity=1,
max_capacity=10).scale_on_utilization(target_utilization_percent=75
)
######################################################################################################################################################################################################
#Creates Lambda IAM Role
lambda_role = aws_iam.Role(self, "Lambda Role",
assumed_by=aws_iam.ServicePrincipal("lambda.amazonaws.com"),
description="Allows Lambda to access DynamoDB"
)
#Allows Lambda function to access DynamoDB
lambda_role.add_to_policy(aws_iam.PolicyStatement(
resources=[document_history_table.table_arn, document_table.table_arn,document_metadata_table.table_arn],
actions=["dynamodb:PutItem", "dynamodb:GetItem"]
)
)
#Allows Lambda function to access S3
lambda_role.add_to_policy(aws_iam.PolicyStatement(
resources=[dml_bucket.bucket_arn, dml_bucket_two.bucket_arn],
actions=["s3:GetObject", "s3:PutObject"]
)
)
#Allows Lambda function to access Textract
lambda_role.add_to_policy(aws_iam.PolicyStatement(
resources=[textract_function_two.function_arn],
actions=["textract:StartDocumentTextDetection",
"textract:StartDocumentAnalysis",
"textract:GetDocumentTextDetection",
"textract:GetDocumentAnalysis"]
)
)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#Creates DynamoDB role
dynamodb_role = aws_iam.Role(self, "DynamoDB Role",
assumed_by=aws_iam.ServicePrincipal("dynamodb.amazonaws.com"),
description= "Allows DynamoDB to access S3"
)
#Allows DynamoDB to access S3
dynamodb_role.add_to_policy(aws_iam.PolicyStatement(
resources=[dml_bucket.bucket_arn, dml_bucket_two.bucket_arn],
actions=["s3:GetObject", "s3:PutObject"]
)
)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#Creates S3 role
s3_role = aws_iam.Role(self, "S3 Role",
assumed_by=aws_iam.ServicePrincipal("s3.amazonaws.com"),
description= "Allows S3 to interact with Lambda"
)
#Allows s3 to access Textract
s3_role.add_to_policy(aws_iam.PolicyStatement(
resources=[textract_function_two.function_arn],
actions=["s3-object-lambda:WriteGetObjectResponse",
"logs:CreateLogGroup",
"logs:CreateLogStream",
"logs:PutLogEvents"]
)
)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#Creates Textract role
textract_role = aws_iam.Role(self, "Textract Role",
assumed_by=aws_iam.ServicePrincipal("textract.amazonaws.com"),
description="Allows Textract to access S3 and DynamoDB"
)
textract_role.add_to_policy(aws_iam.PolicyStatement(
resources=[dml_bucket.bucket_arn, dml_bucket_two.bucket_arn, textract_function_two.function_arn],
actions=["s3-object-lambda:WriteGetObjectResponse"]
)
)
#Alows Textract to access DynamoDB
textract_role.add_to_policy(aws_iam.PolicyStatement(
resources=[dml_bucket.bucket_arn, dml_bucket_two.bucket_arn, textract_function_two.function_arn],
actions=["dynamodb:PutItem","dynamodb:GetItem"]
)
)
#Allows Textract to access S3
textract_role.add_to_policy(aws_iam.PolicyStatement(
resources=[dml_bucket.bucket_arn, dml_bucket_two.bucket_arn],
actions=["s3:GetObject","s3:PutObject"]
)
)
##################################################################################################################################################################################################################
#Defines API Gateway
aws_apigateway.LambdaRestApi(self, "AmplifyDocMessengerAPI",
handler=textract_function_two
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment