Created
September 13, 2021 06:59
-
-
Save PsycheShaman/8f40077aebfd3c7cbb9fd0f04dbae31c to your computer and use it in GitHub Desktop.
Python function to unzip an archive stored in S3 to a specified destination folder in S3.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto3 | |
import io | |
import zipfile | |
def unzip_s3_source_data( | |
s3_source_bucket: str, | |
s3_source_folder: str, | |
s3_destination_folder: str | |
) -> None: | |
""" | |
Convenience function to unzip a compressed datasource in S3 | |
""" | |
print('*'*100) | |
print(f'Listing files in s3://{s3_source_bucket}\n') | |
print('='*100) | |
s3 = boto3.client('s3') | |
s3_files = s3.list_objects_v2( | |
Bucket=s3_source_bucket, | |
Prefix=s3_source_folder, | |
) | |
s3_files = [ | |
s3_files['Contents'][i]['Key'] | |
for i in range(len(s3_files['Contents'])) | |
if s3_files['Contents'][i]['Size'] != 0 | |
] | |
s3_source_files = '\n'.join(s3_files) | |
print(f'Source files in s3://{s3_source_bucket} are: {s3_source_files}\n') | |
print('='*100) | |
for raw_data_filename in s3_files: | |
if raw_data_filename.endswith('.zip'): | |
print(f'Downloading {raw_data_filename}\n') | |
print('='*100) | |
_bytes = io.BytesIO( | |
s3.get_object( | |
Bucket=s3_source_bucket, | |
Key=raw_data_filename | |
)['Body'].read() | |
) | |
_zip = zipfile.ZipFile(_bytes) | |
zip_file_names = [i.filename for i in _zip.infolist()] | |
filelist = '\n'.join(zip_file_names) | |
print(f'Zip archive contains:\n') | |
print(f'{filelist}\n') | |
print('='*100) | |
for f in zip_file_names: | |
print(f'Uploading unzipped file {f} to {s3_destination_folder}/{f}\n') | |
print('='*100) | |
try: | |
s3.put_object( | |
Body=_zip.read(f), | |
Bucket=s3_source_bucket, | |
Key=f'{s3_destination_folder}/{f}', | |
) | |
except Exception as e: | |
print(f'Uncaught exception in file upload: {e}\n') | |
print('!*'*100) | |
raise(e) | |
print('Success!') | |
print('*'*100) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment