-
-
Save rygh4775/fa5355386516ebc15b2bbabe2fd2533e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # lamdda_function.py from https://github.com/alfonsof/aws-python-examples/blob/master/awslambdas3move-capi/README.md | |
| # It handles an AWS Lambda function that moves an object when it appears | |
| # in a S3 bucket to another S3 bucket. | |
| # It uses Client API (low-level) of Boto3. | |
| import os | |
| import boto3 | |
| import botocore | |
| import re | |
| import datetime | |
| # Retrieve environment variable TARGET_BUCKET | |
| destination_bucket_name = os.environ.get('TARGET_BUCKET', None) | |
| destination_key_prefix = 'ext/' | |
| # Create as S3 Client | |
| s3_client = boto3.client('s3') | |
| def lambda_handler(event, context): | |
| if destination_bucket_name is None: | |
| # Environment variable TARGET_BUCKET does not exist | |
| print('Error: TARGET_BUCKET Lambda environment variable does not exist!!') | |
| return | |
| source_bucket_name = event['Records'][0]['s3']['bucket']['name'] | |
| source_key = event['Records'][0]['s3']['object']['key'] # ex: AWSAppFabric/AuditLog/OCSF/PARQUET/OKTA/f2cb5c27-6ed2-4f6e-b528-6224d6bd74b7/05e72aa9-a984-4218-b9cd-1de04fbf0754/20240722/AuditLog-1721363377774-63d20c8b-69f8-4b69-bce6-cc863ae93c87-6001.parquet | |
| file_name = get_file_name(source_key) | |
| custom_source = get_custom_source(file_name) | |
| region = event['Records'][0]['awsRegion'] | |
| account = context.invoked_function_arn.split(":")[4] | |
| event_day = datetime.datetime.today().strftime("%Y%m%d") | |
| # Best practices for ingesting custom sources: https://docs.aws.amazon.com/security-lake/latest/userguide/custom-sources.html#custom-sources-best-practices | |
| destination_key = destination_key_prefix + custom_source + '/region_par=' + region + '/accountId_par=' + account + '/eventDay_par=' + event_day + '/' + file_name | |
| print('From - bucket: ' + source_bucket_name) | |
| print('From - object: ' + source_key) | |
| print('To - bucket: ' + destination_bucket_name) | |
| print('To - object: ' + destination_key) | |
| try: | |
| # Copy the object | |
| print('Moving object ...') | |
| copy_source = { | |
| 'Bucket': source_bucket_name, | |
| 'Key': source_key | |
| } | |
| s3_client.copy(copy_source, destination_bucket_name, destination_key) | |
| # Delete the object from source bucket | |
| response = s3_client.delete_object(Bucket=source_bucket_name, Key=source_key) | |
| print(response) | |
| print('\nMoved') | |
| except botocore.exceptions.ClientError as e: | |
| if e.response['Error']['Code'] == "AccessDenied": | |
| print("Error: Access denied!!") | |
| elif e.response['Error']['Code'] == "InvalidBucketName": | |
| print("Error: Invalid bucket name!!") | |
| elif e.response['Error']['Code'] == "NoSuchBucket": | |
| print("Error: No such bucket!!") | |
| else: | |
| raise | |
| def get_file_name(key): | |
| pattern = r'/([^/]+\.parquet)$' | |
| match = re.search(pattern, key) | |
| if match: | |
| return match.group(1) | |
| else: | |
| return None | |
| def get_custom_source(file_name): | |
| pattern = r'^(.*?)-(\d+)\.parquet$' | |
| match = re.match(pattern, file_name) | |
| if match: | |
| return match.group(2) | |
| else: | |
| return None |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment