Last active
October 28, 2023 06:13
-
-
Save odinokov/4d690924eb4dc44c5f1c5fcf1f2882a4 to your computer and use it in GitHub Desktop.
upload files to AWS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# To securely collect user input regarding file lists, S3 bucket details, and AWS credentials. | |
# Then, upload the listed files to a specified S3 bucket and transition their storage class to DEEP_ARCHIVE. | |
import boto3 | |
import os | |
import getpass | |
import logging | |
from botocore.exceptions import NoCredentialsError, BotoCoreError, ClientError | |
from tqdm import tqdm | |
# Setup logging | |
logging.basicConfig(level=logging.INFO) | |
def initialize_s3_client(aws_access_key_id: str, aws_secret_access_key: str, region_name='ap-southeast-1') -> boto3.client: | |
"""Initialize and return the S3 client.""" | |
try: | |
return boto3.client('s3', | |
aws_access_key_id=aws_access_key_id, | |
aws_secret_access_key=aws_secret_access_key, | |
region_name=region_name) | |
except NoCredentialsError: | |
logging.error("No AWS credentials provided or found.") | |
return None | |
def generate_s3_key(root_directory: str, file_path: str, new_folder_name: str) -> str: | |
"""Generate S3 key based on the relative path of the file from the root directory.""" | |
relative_path = os.path.relpath(file_path, root_directory) | |
# Check for path traversal outside of the root directory | |
if relative_path.startswith("../"): | |
logging.warning(f"Path traversal outside of the root directory detected for file: {file_path}") | |
return None | |
s3_key = os.path.join(new_folder_name, relative_path).replace("\\", "/") # Ensure forward slashes for S3 paths | |
return s3_key | |
def upload_file_to_s3_and_archive(s3, file_path: str, bucket_name: str, s3_key: str) -> None: | |
"""Upload a single file to the specified S3 bucket and folder, then transition its storage class to DEEP_ARCHIVE.""" | |
try: | |
s3.upload_file(file_path, bucket_name, s3_key) | |
# Transition storage class to DEEP_ARCHIVE | |
s3.copy_object(Bucket=bucket_name, CopySource={'Bucket': bucket_name, 'Key': s3_key}, Key=s3_key, StorageClass='DEEP_ARCHIVE') | |
except (BotoCoreError, ClientError) as e: | |
logging.error(f"Error uploading {file_path} to {bucket_name}/{s3_key} or setting its storage class. Error: {e}") | |
def upload_files_to_s3(root_directory: str, file_list_path: str, s3, bucket_name: str, new_folder_name: str) -> None: | |
"""Upload files listed in the file_list_path to the S3 bucket and set their storage class to DEEP_ARCHIVE.""" | |
with open(file_list_path, 'r', encoding='utf-8-sig') as f: | |
files = [line.strip() for line in f] | |
for file_path in tqdm(files, desc="Uploading files", leave=True, total=len(files)): | |
normalized_path = os.path.normpath(os.path.join(root_directory, file_path)) | |
if not os.path.exists(normalized_path): | |
logging.warning(f"File {normalized_path} not found. Skipping...") | |
continue | |
s3_key = generate_s3_key(root_directory, normalized_path, new_folder_name) | |
if s3_key is None: | |
continue # Skip file if path traversal outside of the root directory is detected | |
upload_file_to_s3_and_archive(s3, normalized_path, bucket_name, s3_key) | |
logging.info(f"All files uploaded to {bucket_name}/{new_folder_name} and set to DEEP_ARCHIVE storage class.") | |
def get_user_confirmation(file_list_txt: str, bucket_name: str, new_folder_name: str, root_directory: str, region_name: str) -> bool: | |
"""Display user inputs and ask for confirmation.""" | |
print("\nPlease confirm your inputs:") | |
print(f"File List: {file_list_txt}") | |
print(f"Bucket Name: {bucket_name}") | |
print(f"New Folder Name: {new_folder_name}") | |
print(f"Root Directory: {root_directory}") | |
print(f"Region Name: {region_name}\n") | |
confirmation = input("Proceed with these settings? (y/n): ").lower().strip() | |
return confirmation == 'y' | |
def main(): | |
default_file_list = "files_to_archived.txt" | |
file_list_txt = input(f"Enter the path to your file list (default is '{default_file_list}'): ") | |
if not file_list_txt: | |
file_list_txt = default_file_list | |
if not os.path.exists(file_list_txt): | |
logging.error(f"File {file_list_txt} does not exist. Exiting.") | |
return | |
bucket_name = input("Enter your S3 bucket name: ") | |
aws_access_key_id = getpass.getpass("Enter your AWS access key ID: ") | |
aws_secret_access_key = getpass.getpass("Enter your AWS secret access key: ") | |
new_folder_name = input("Enter your desired folder name in the bucket: ") | |
root_directory = input("Enter the root directory from which to calculate relative paths: ") | |
region_name = input("Enter your AWS region (default is 'ap-southeast-1'): ") | |
if not region_name: | |
region_name = 'ap-southeast-1' | |
if not get_user_confirmation(file_list_txt, bucket_name, new_folder_name, root_directory, region_name): | |
print("User did not confirm settings. Exiting.") | |
return | |
s3 = initialize_s3_client(aws_access_key_id, aws_secret_access_key, region_name) | |
if s3: | |
upload_files_to_s3(root_directory, file_list_txt, s3, bucket_name, new_folder_name) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment