Skip to content

Instantly share code, notes, and snippets.

@rygh4775
Last active July 22, 2024 14:26
Show Gist options
  • Select an option

  • Save rygh4775/fa5355386516ebc15b2bbabe2fd2533e to your computer and use it in GitHub Desktop.

Select an option

Save rygh4775/fa5355386516ebc15b2bbabe2fd2533e to your computer and use it in GitHub Desktop.
# lamdda_function.py from https://github.com/alfonsof/aws-python-examples/blob/master/awslambdas3move-capi/README.md
# It handles an AWS Lambda function that moves an object when it appears
# in a S3 bucket to another S3 bucket.
# It uses Client API (low-level) of Boto3.
import os
import boto3
import botocore
import re
import datetime
# Retrieve environment variable TARGET_BUCKET
destination_bucket_name = os.environ.get('TARGET_BUCKET', None)
destination_key_prefix = 'ext/'
# Create as S3 Client
s3_client = boto3.client('s3')
def lambda_handler(event, context):
if destination_bucket_name is None:
# Environment variable TARGET_BUCKET does not exist
print('Error: TARGET_BUCKET Lambda environment variable does not exist!!')
return
source_bucket_name = event['Records'][0]['s3']['bucket']['name']
source_key = event['Records'][0]['s3']['object']['key'] # ex: AWSAppFabric/AuditLog/OCSF/PARQUET/OKTA/f2cb5c27-6ed2-4f6e-b528-6224d6bd74b7/05e72aa9-a984-4218-b9cd-1de04fbf0754/20240722/AuditLog-1721363377774-63d20c8b-69f8-4b69-bce6-cc863ae93c87-6001.parquet
file_name = get_file_name(source_key)
custom_source = get_custom_source(file_name)
region = event['Records'][0]['awsRegion']
account = context.invoked_function_arn.split(":")[4]
event_day = datetime.datetime.today().strftime("%Y%m%d")
# Best practices for ingesting custom sources: https://docs.aws.amazon.com/security-lake/latest/userguide/custom-sources.html#custom-sources-best-practices
destination_key = destination_key_prefix + custom_source + '/region_par=' + region + '/accountId_par=' + account + '/eventDay_par=' + event_day + '/' + file_name
print('From - bucket: ' + source_bucket_name)
print('From - object: ' + source_key)
print('To - bucket: ' + destination_bucket_name)
print('To - object: ' + destination_key)
try:
# Copy the object
print('Moving object ...')
copy_source = {
'Bucket': source_bucket_name,
'Key': source_key
}
s3_client.copy(copy_source, destination_bucket_name, destination_key)
# Delete the object from source bucket
response = s3_client.delete_object(Bucket=source_bucket_name, Key=source_key)
print(response)
print('\nMoved')
except botocore.exceptions.ClientError as e:
if e.response['Error']['Code'] == "AccessDenied":
print("Error: Access denied!!")
elif e.response['Error']['Code'] == "InvalidBucketName":
print("Error: Invalid bucket name!!")
elif e.response['Error']['Code'] == "NoSuchBucket":
print("Error: No such bucket!!")
else:
raise
def get_file_name(key):
pattern = r'/([^/]+\.parquet)$'
match = re.search(pattern, key)
if match:
return match.group(1)
else:
return None
def get_custom_source(file_name):
pattern = r'^(.*?)-(\d+)\.parquet$'
match = re.match(pattern, file_name)
if match:
return match.group(2)
else:
return None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment