Skip to content

Instantly share code, notes, and snippets.

@BetterProgramming
Created July 14, 2020 16:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save BetterProgramming/abe43a5c6cea230925d43aefbb2892f9 to your computer and use it in GitHub Desktop.
Save BetterProgramming/abe43a5c6cea230925d43aefbb2892f9 to your computer and use it in GitHub Desktop.
import boto3
from trp import Document
# Document
s3BucketName = "<Your bucket name>"
documentName = "<Image with text>"
# Amazon Textract client
textract = boto3.client('textract')
# Call Amazon Textract
response = textract.analyze_document(
Document={
'S3Object': {
'Bucket': s3BucketName,
'Name': documentName
}
},
FeatureTypes=["FORMS"])
#print(response)
doc = Document(response)
for page in doc.pages:
# Print fields
print("Fields:")
for field in page.form.fields:
print("Key: {}, Value: {}".format(field.key, field.value))
# Get field by key
print("\nGet Field by Key:")
key = "Phone Number:"
field = page.form.getFieldByKey(key)
if(field):
print("Key: {}, Value: {}".format(field.key, field.value))
# Search fields by key
print("\nSearch Fields:")
key = "address"
fields = page.form.searchFieldsByKey(key)
for field in fields:
print("Key: {}, Value: {}".format(field.key, field.value))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment