Created
February 6, 2017 05:00
-
-
Save fernando-mc/7761b3b3b6a274787423945356309b5a to your computer and use it in GitHub Desktop.
Simple CSV Checker
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Validates Uploaded CSVs to S3 | |
import boto3 | |
import csv | |
import pg8000 | |
EXPECTED_HEADERS = ['header_one', 'header_two', 'header_three'] | |
def get_csv_from_s3(bucket_name, key_name): | |
"""Download CSV from s3 to local temp storage""" | |
# Use boto3 to connect to S3 and download the file to Lambda tmp storage | |
# This allows Lambda to access and use the file | |
def validate_csv(): | |
"""Validates that CSVs match a certain format""" | |
with open(csv_name, 'rb') as csv_to_test: | |
reader = csv.reader(csv_to_test) | |
headers = reader.next() | |
# Return True if headers match what's expected | |
return headers == EXPECTED_HEADERS | |
def load_valid_data(): | |
"""Loads validated data to Redshift from S3""" | |
# Add code to run an COPY command from S3 | |
# http://docs.aws.amazon.com/redshift/latest/dg/tutorial-loading-data.html | |
print 'Data loaded to Redshift. Yay!' | |
def trigger_alarm(): | |
"""Send email to someone important using AWS SES to warn them about invalid datae""" | |
def handler(event, context): | |
# Use the event object to get the location of the csv in S3 | |
# Then create bucket_name and key_name | |
get_csv_from_s3(bucket_name, key_name) | |
# Make sure you're giving validate_csv the right location of the file! | |
if validate_csv(): | |
# if the data is valid: | |
load_valid_data() | |
else: | |
trigger_alarm() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment