Skip to content

Instantly share code, notes, and snippets.

@fernando-mc
Created February 6, 2017 05:00
Show Gist options
  • Save fernando-mc/7761b3b3b6a274787423945356309b5a to your computer and use it in GitHub Desktop.
Save fernando-mc/7761b3b3b6a274787423945356309b5a to your computer and use it in GitHub Desktop.
Simple CSV Checker
# Validates Uploaded CSVs to S3
import boto3
import csv
import pg8000
EXPECTED_HEADERS = ['header_one', 'header_two', 'header_three']
def get_csv_from_s3(bucket_name, key_name):
"""Download CSV from s3 to local temp storage"""
# Use boto3 to connect to S3 and download the file to Lambda tmp storage
# This allows Lambda to access and use the file
def validate_csv():
"""Validates that CSVs match a certain format"""
with open(csv_name, 'rb') as csv_to_test:
reader = csv.reader(csv_to_test)
headers = reader.next()
# Return True if headers match what's expected
return headers == EXPECTED_HEADERS
def load_valid_data():
"""Loads validated data to Redshift from S3"""
# Add code to run an COPY command from S3
# http://docs.aws.amazon.com/redshift/latest/dg/tutorial-loading-data.html
print 'Data loaded to Redshift. Yay!'
def trigger_alarm():
"""Send email to someone important using AWS SES to warn them about invalid datae"""
def handler(event, context):
# Use the event object to get the location of the csv in S3
# Then create bucket_name and key_name
get_csv_from_s3(bucket_name, key_name)
# Make sure you're giving validate_csv the right location of the file!
if validate_csv():
# if the data is valid:
load_valid_data()
else:
trigger_alarm()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment