Skip to content

Instantly share code, notes, and snippets.

@SumindaD
Last active June 28, 2019 06:32
Show Gist options
  • Save SumindaD/c73608b1402b2fdaea7ccdcb55487299 to your computer and use it in GitHub Desktop.
Save SumindaD/c73608b1402b2fdaea7ccdcb55487299 to your computer and use it in GitHub Desktop.
import json
import urllib.parse
import boto3
print('Loading function')
def lambda_handler(event, context):
print("Triggered getTextFromS3PDF event: " + json.dumps(event, indent=2))
# Get the object from the event and show its content type
bucket = event['Records'][0]['s3']['bucket']['name']
key = urllib.parse.unquote_plus(event['Records'][0]['s3']['object']['key'], encoding='utf-8')
try:
textract = boto3.client('textract')
textract.start_document_text_detection(
DocumentLocation={
'S3Object': {
'Bucket': bucket,
'Name': key
}
},
JobTag=key + '_Job',
NotificationChannel={
'RoleArn': '<RoleArn>',
'SNSTopicArn': '<SNSTopicArn>'
})
return 'Triggered PDF Processing for ' + key
except Exception as e:
print(e)
print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(key, bucket))
raise e
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment