import boto3
import argparse
import os
import json
import shutil
from botocore.exceptions import NoCredentialsError, PartialCredentialsError, ProfileNotFound
import difflib
from termcolor import colored
def download_s3_object(bucket_name, object_key, download_path, aws_profile):
try:
# Initialize a session using the specified profile
session = boto3.Session(profile_name=aws_profile)
# Get the S3 client
s3_client = session.client('s3')
# Download the file
s3_client.download_file(bucket_name, object_key, download_path)
print(f'Successfully downloaded {object_key} from {bucket_name} to {download_path}')
except ProfileNotFound as e:
print(f'Error: The config profile ({aws_profile}) could not be found.')
print('Available profiles are:')
for profile in boto3.session.Session().available_profiles:
print(f' - {profile}')
except (NoCredentialsError, PartialCredentialsError) as e:
print(f'Error in authentication: {e}')
except Exception as e:
print(f'An error occurred: {e}')
def check_and_modify_objects(temp_dir, output_dir):
log_file_path = os.path.join(output_dir, 'modifications.log')
modified_files = []
with open(log_file_path, 'w') as log_file:
for filename in os.listdir(temp_dir):
file_path = os.path.join(temp_dir, filename)
with open(file_path, 'r') as file:
original_data = file.read()
data = json.loads(original_data)
modified = False
for key, value in data.items():
if value.get('storage_trie_updated') or value.get('updates'):
# Modify the object
value['storage_trie_updated'] = False
value['updates'] = []
modified = True
if modified:
modified_data = json.dumps(data, indent=4)
modified_file_path = os.path.join(output_dir, filename)
with open(modified_file_path, 'w') as modified_file:
modified_file.write(modified_data)
original_lines = original_data.splitlines(keepends=True)
modified_lines = modified_data.splitlines(keepends=True)
diff = difflib.unified_diff(
original_lines,
modified_lines,
fromfile=f'original/{filename}',
tofile=f'modified/{filename}'
)
# Print colored diff to terminal
for line in diff:
if line.startswith('+'):
print(colored(line, 'green'), end='')
elif line.startswith('-'):
print(colored(line, 'red'), end='')
elif line.startswith('@@'):
print(colored(line, 'cyan'), end='')
modified_files.append(filename)
return modified_files
def upload_modified_objects(bucket_name, output_dir, modified_files, aws_profile):
try:
# Initialize a session using the specified profile
session = boto3.Session(profile_name=aws_profile)
# Get the S3 client
s3_client = session.client('s3')
for filename in modified_files:
object_key = f'Scraper/1/blocks/{filename}'
file_path = os.path.join(output_dir, filename)
with open(file_path, 'r') as file:
file_content = file.read()
file_content_cleaned = ''.join(file_content.split())
print(f'Uploading {filename} content:\n{file_content_cleaned}')
s3_client.put_object(Bucket=bucket_name, Key=object_key, Body=file_content_cleaned.encode('utf-8'))
print(f'Successfully uploaded {filename} to {bucket_name}/{object_key}')
except Exception as e:
print(f'An error occurred: {e}')
def download_s3_objects_in_range(bucket_name, start_block, end_block, temp_dir, output_dir, aws_profile):
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
try:
# Initialize a session using the specified profile
session = boto3.Session(profile_name=aws_profile)
# Get the S3 client
s3_client = session.client('s3')
for block in range(start_block, end_block + 1):
object_key = f'Scraper/1/blocks/{block}.json'
download_path = os.path.join(temp_dir, f'{block}.json')
download_s3_object(bucket_name, object_key, download_path, aws_profile)
modified_files = check_and_modify_objects(temp_dir, output_dir)
if modified_files:
print('\nDo you want to upload the modified files back to the S3 bucket? (yes/no)')
response = input()
if response.lower() == 'yes':
upload_modified_objects(bucket_name, output_dir, modified_files, aws_profile)
else:
print('Changes were not uploaded.')
print('All objects have been processed. Check modifications.log for details.')
except Exception as e:
print(f'An error occurred: {e}')
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Download S3 objects within a range.')
parser.add_argument('bucket_name', type=str, help='The name of the S3 bucket.')
parser.add_argument('start_block', type=int, help='The starting block number.')
parser.add_argument('end_block', type=int, help='The ending block number.')
parser.add_argument('output_dir', type=str, help='The directory to save the downloaded files.')
parser.add_argument('aws_profile', type=str, help='The AWS profile to use.')
args = parser.parse_args()
temp_dir = os.path.join(args.output_dir, 'temp')
download_s3_objects_in_range(args.bucket_name, args.start_block, args.end_block, temp_dir, args.output_dir, args.aws_profile)
```