Skip to content

Instantly share code, notes, and snippets.

@GabrielSGoncalves
Last active May 8, 2022 20:16
Show Gist options
  • Save GabrielSGoncalves/b878aaa16a640afae92b80b1795b8a37 to your computer and use it in GitHub Desktop.
Save GabrielSGoncalves/b878aaa16a640afae92b80b1795b8a37 to your computer and use it in GitHub Desktop.
Amazon Lambda function used on medium article
import json
from io import StringIO
import boto3
import os
import pandas as pd
def write_dataframe_to_csv_on_s3(dataframe, filename, bucket):
""" Write a dataframe to a CSV on S3 """
# Create buffer
csv_buffer = StringIO()
# Write dataframe to buffer
dataframe.to_csv(csv_buffer, sep="\t")
# Create S3 object
s3_resource = boto3.resource("s3")
# Write buffer to S3 object
s3_resource.Object(bucket, f'{filename}').put(
Body=csv_buffer.getvalue())
def lambda_handler(event, context):
try:
# Get variables from event
BUCKET = event.get('bucket')
KEY = event.get('file_key')
OUTPUT = event.get('output_file')
GROUP = event.get('group')
COLUMN = event.get('column')
# Set client to get file from S3
s3_client = boto3.client('s3')
response = s3_client.get_object(Bucket=BUCKET,
Key=KEY)
csv_file = response["Body"]
# Load csv as a Pandas Dataframe
df = pd.read_csv(csv_file, index_col=0, low_memory=False)
# Create groupby Dataframe
df_groupby = pd.DataFrame(df.groupby(GROUP)[
COLUMN].mean().sort_values(ascending=False)).round(2)
# Save the Dataframe to the same S3 BUCKET
write_dataframe_to_csv_on_s3(df_groupby, OUTPUT, BUCKET)
return {
'statusCode': 200,
'message':'Success!'}
except:
return {
'statusCode': 400,
'body': 'Error, bad request!'}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment