Created
May 11, 2019 15:52
-
-
Save SrushithR/1dbb6d3521383c259b47756506cf5955 to your computer and use it in GitHub Desktop.
Sample code to stream data from a JSON file in S3 using S3 select
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A sample funtion to perform S3 select operation on a JSON file stored in S3 bucket | |
The bucket and file names are stored as environment variables | |
""" | |
import os | |
import boto3 | |
def get_data(): | |
""" | |
Using S3 select to query data from S3 | |
""" | |
s3 = boto3.client('s3') | |
expression = "SELECT * FROM S3Object" | |
response = s3.select_object_content( | |
Bucket=os.environ['bucket_name'], | |
Key=os.environ['file_name'], | |
ExpressionType = 'SQL', | |
Expression = expression, | |
InputSerialization = { | |
'CompressionType': 'NONE', | |
'JSON': { | |
'Type': 'Document', | |
} | |
}, | |
OutputSerialization = { | |
'JSON': { | |
'RecordDelimiter': '\n', | |
} | |
} | |
) | |
event_stream = response['Payload'] | |
data = '' | |
# converting the returned stream into a list | |
for event in event_stream: | |
if 'Records' in event: | |
data += event['Records']['Payload'] | |
return data |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment