Skip to content

Instantly share code, notes, and snippets.

@odinokov
Last active October 28, 2023 06:13
Show Gist options
  • Save odinokov/4d690924eb4dc44c5f1c5fcf1f2882a4 to your computer and use it in GitHub Desktop.
Save odinokov/4d690924eb4dc44c5f1c5fcf1f2882a4 to your computer and use it in GitHub Desktop.
upload files to AWS
# To securely collect user input regarding file lists, S3 bucket details, and AWS credentials.
# Then, upload the listed files to a specified S3 bucket and transition their storage class to DEEP_ARCHIVE.
import boto3
import os
import getpass
import logging
from botocore.exceptions import NoCredentialsError, BotoCoreError, ClientError
from tqdm import tqdm
# Setup logging
logging.basicConfig(level=logging.INFO)
def initialize_s3_client(aws_access_key_id: str, aws_secret_access_key: str, region_name='ap-southeast-1') -> boto3.client:
"""Initialize and return the S3 client."""
try:
return boto3.client('s3',
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
region_name=region_name)
except NoCredentialsError:
logging.error("No AWS credentials provided or found.")
return None
def generate_s3_key(root_directory: str, file_path: str, new_folder_name: str) -> str:
"""Generate S3 key based on the relative path of the file from the root directory."""
relative_path = os.path.relpath(file_path, root_directory)
# Check for path traversal outside of the root directory
if relative_path.startswith("../"):
logging.warning(f"Path traversal outside of the root directory detected for file: {file_path}")
return None
s3_key = os.path.join(new_folder_name, relative_path).replace("\\", "/") # Ensure forward slashes for S3 paths
return s3_key
def upload_file_to_s3_and_archive(s3, file_path: str, bucket_name: str, s3_key: str) -> None:
"""Upload a single file to the specified S3 bucket and folder, then transition its storage class to DEEP_ARCHIVE."""
try:
s3.upload_file(file_path, bucket_name, s3_key)
# Transition storage class to DEEP_ARCHIVE
s3.copy_object(Bucket=bucket_name, CopySource={'Bucket': bucket_name, 'Key': s3_key}, Key=s3_key, StorageClass='DEEP_ARCHIVE')
except (BotoCoreError, ClientError) as e:
logging.error(f"Error uploading {file_path} to {bucket_name}/{s3_key} or setting its storage class. Error: {e}")
def upload_files_to_s3(root_directory: str, file_list_path: str, s3, bucket_name: str, new_folder_name: str) -> None:
"""Upload files listed in the file_list_path to the S3 bucket and set their storage class to DEEP_ARCHIVE."""
with open(file_list_path, 'r', encoding='utf-8-sig') as f:
files = [line.strip() for line in f]
for file_path in tqdm(files, desc="Uploading files", leave=True, total=len(files)):
normalized_path = os.path.normpath(os.path.join(root_directory, file_path))
if not os.path.exists(normalized_path):
logging.warning(f"File {normalized_path} not found. Skipping...")
continue
s3_key = generate_s3_key(root_directory, normalized_path, new_folder_name)
if s3_key is None:
continue # Skip file if path traversal outside of the root directory is detected
upload_file_to_s3_and_archive(s3, normalized_path, bucket_name, s3_key)
logging.info(f"All files uploaded to {bucket_name}/{new_folder_name} and set to DEEP_ARCHIVE storage class.")
def get_user_confirmation(file_list_txt: str, bucket_name: str, new_folder_name: str, root_directory: str, region_name: str) -> bool:
"""Display user inputs and ask for confirmation."""
print("\nPlease confirm your inputs:")
print(f"File List: {file_list_txt}")
print(f"Bucket Name: {bucket_name}")
print(f"New Folder Name: {new_folder_name}")
print(f"Root Directory: {root_directory}")
print(f"Region Name: {region_name}\n")
confirmation = input("Proceed with these settings? (y/n): ").lower().strip()
return confirmation == 'y'
def main():
default_file_list = "files_to_archived.txt"
file_list_txt = input(f"Enter the path to your file list (default is '{default_file_list}'): ")
if not file_list_txt:
file_list_txt = default_file_list
if not os.path.exists(file_list_txt):
logging.error(f"File {file_list_txt} does not exist. Exiting.")
return
bucket_name = input("Enter your S3 bucket name: ")
aws_access_key_id = getpass.getpass("Enter your AWS access key ID: ")
aws_secret_access_key = getpass.getpass("Enter your AWS secret access key: ")
new_folder_name = input("Enter your desired folder name in the bucket: ")
root_directory = input("Enter the root directory from which to calculate relative paths: ")
region_name = input("Enter your AWS region (default is 'ap-southeast-1'): ")
if not region_name:
region_name = 'ap-southeast-1'
if not get_user_confirmation(file_list_txt, bucket_name, new_folder_name, root_directory, region_name):
print("User did not confirm settings. Exiting.")
return
s3 = initialize_s3_client(aws_access_key_id, aws_secret_access_key, region_name)
if s3:
upload_files_to_s3(root_directory, file_list_txt, s3, bucket_name, new_folder_name)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment