Skip to content

Instantly share code, notes, and snippets.

@kaisbaccour
Created June 6, 2024 15:35
Show Gist options
  • Save kaisbaccour/d6b265b4aadc71b7f3f4aa39b578962c to your computer and use it in GitHub Desktop.
Save kaisbaccour/d6b265b4aadc71b7f3f4aa39b578962c to your computer and use it in GitHub Desktop.
python3 lagrange-fix-blocks.py prod-distributed-query 20030470 20030500 ./s3-blob-test AdministratorAccess-905418166352
import boto3
import argparse
import os
import json
import shutil
from botocore.exceptions import NoCredentialsError, PartialCredentialsError, ProfileNotFound
import difflib
from termcolor import colored

def download_s3_object(bucket_name, object_key, download_path, aws_profile):
    try:
        # Initialize a session using the specified profile
        session = boto3.Session(profile_name=aws_profile)

        # Get the S3 client
        s3_client = session.client('s3')

        # Download the file
        s3_client.download_file(bucket_name, object_key, download_path)

        print(f'Successfully downloaded {object_key} from {bucket_name} to {download_path}')

    except ProfileNotFound as e:
        print(f'Error: The config profile ({aws_profile}) could not be found.')
        print('Available profiles are:')
        for profile in boto3.session.Session().available_profiles:
            print(f' - {profile}')
    except (NoCredentialsError, PartialCredentialsError) as e:
        print(f'Error in authentication: {e}')
    except Exception as e:
        print(f'An error occurred: {e}')

def check_and_modify_objects(temp_dir, output_dir):
    log_file_path = os.path.join(output_dir, 'modifications.log')
    modified_files = []

    with open(log_file_path, 'w') as log_file:
        for filename in os.listdir(temp_dir):
            file_path = os.path.join(temp_dir, filename)

            with open(file_path, 'r') as file:
                original_data = file.read()
                data = json.loads(original_data)

                modified = False

                for key, value in data.items():
                    if value.get('storage_trie_updated') or value.get('updates'):
                        # Modify the object
                        value['storage_trie_updated'] = False
                        value['updates'] = []
                        modified = True

                if modified:
                    modified_data = json.dumps(data, indent=4)
                    modified_file_path = os.path.join(output_dir, filename)
                    with open(modified_file_path, 'w') as modified_file:
                        modified_file.write(modified_data)

                    original_lines = original_data.splitlines(keepends=True)
                    modified_lines = modified_data.splitlines(keepends=True)

                    diff = difflib.unified_diff(
                        original_lines,
                        modified_lines,
                        fromfile=f'original/{filename}',
                        tofile=f'modified/{filename}'
                    )

                    # Print colored diff to terminal
                    for line in diff:
                        if line.startswith('+'):
                            print(colored(line, 'green'), end='')
                        elif line.startswith('-'):
                            print(colored(line, 'red'), end='')
                        elif line.startswith('@@'):
                            print(colored(line, 'cyan'), end='')

                    modified_files.append(filename)

        return modified_files


def upload_modified_objects(bucket_name, output_dir, modified_files, aws_profile):
    try:
        # Initialize a session using the specified profile
        session = boto3.Session(profile_name=aws_profile)

        # Get the S3 client
        s3_client = session.client('s3')

        for filename in modified_files:
            object_key = f'Scraper/1/blocks/{filename}'
            file_path = os.path.join(output_dir, filename)

            with open(file_path, 'r') as file:
                file_content = file.read()
                file_content_cleaned = ''.join(file_content.split())
                print(f'Uploading {filename} content:\n{file_content_cleaned}')
                s3_client.put_object(Bucket=bucket_name, Key=object_key, Body=file_content_cleaned.encode('utf-8'))

            print(f'Successfully uploaded {filename} to {bucket_name}/{object_key}')

    except Exception as e:
        print(f'An error occurred: {e}')


def download_s3_objects_in_range(bucket_name, start_block, end_block, temp_dir, output_dir, aws_profile):
    if not os.path.exists(temp_dir):
        os.makedirs(temp_dir)

    try:
        # Initialize a session using the specified profile
        session = boto3.Session(profile_name=aws_profile)

        # Get the S3 client
        s3_client = session.client('s3')

        for block in range(start_block, end_block + 1):
            object_key = f'Scraper/1/blocks/{block}.json'
            download_path = os.path.join(temp_dir, f'{block}.json')
            download_s3_object(bucket_name, object_key, download_path, aws_profile)

        modified_files = check_and_modify_objects(temp_dir, output_dir)

        if modified_files:
            print('\nDo you want to upload the modified files back to the S3 bucket? (yes/no)')
            response = input()

            if response.lower() == 'yes':
                upload_modified_objects(bucket_name, output_dir, modified_files, aws_profile)
            else:
                print('Changes were not uploaded.')

        print('All objects have been processed. Check modifications.log for details.')

    except Exception as e:
        print(f'An error occurred: {e}')

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Download S3 objects within a range.')
    parser.add_argument('bucket_name', type=str, help='The name of the S3 bucket.')
    parser.add_argument('start_block', type=int, help='The starting block number.')
    parser.add_argument('end_block', type=int, help='The ending block number.')
    parser.add_argument('output_dir', type=str, help='The directory to save the downloaded files.')
    parser.add_argument('aws_profile', type=str, help='The AWS profile to use.')

    args = parser.parse_args()

    temp_dir = os.path.join(args.output_dir, 'temp')
    download_s3_objects_in_range(args.bucket_name, args.start_block, args.end_block, temp_dir, args.output_dir, args.aws_profile)
    ```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment