Skip to content

Instantly share code, notes, and snippets.

@andrewgross
Created January 27, 2019 19:45
Show Gist options
  • Save andrewgross/6479353aef6917918ad000b8176ec062 to your computer and use it in GitHub Desktop.
Save andrewgross/6479353aef6917918ad000b8176ec062 to your computer and use it in GitHub Desktop.
This script is to help for properly setting permissions so that you can read S3 Inventory data in an account that is not the owner of a bucket. It assumes that you have a ROLE_ARN that can assume a role in the main account that has Read Permissions and R/W for Object ACL permissions on your s3 bucket.
import datetime
import json
BUCKET_NAME = "<s3_bucket_name>"
INVENTORY_PREFIX = "<prefix_given_to_s3_inventory>" # Should have data/, hive/, and some dated folders inside of it
ACCOUNT_CUID = "<your_canonical_user_id_for_cross_account>" # Account which is not the owner of S3 bucket, but trying to access it. Controls ROLE_ARN
ROLE_ARN = "<role_in_cross_account_that_can_assume_to_main_account>"
def role_arn_to_session(role_arn):
client = boto3.client('sts')
response = client.assume_role(RoleArn=role_arn, RoleSessionName="S3InventoryCleanup")
return boto3.Session(
aws_access_key_id=response['Credentials']['AccessKeyId'],
aws_secret_access_key=response['Credentials']['SecretAccessKey'],
aws_session_token=response['Credentials']['SessionToken'])
session = role_arn_to_session(ROLE_ARN)
s3_resource = session.resource('s3', region_name='us-east-1')
s3_client = session.client('s3', region_name='us-east-1')
def _get_latest_inventory_date_prefix():
"""
Inventory dates are run for "yesterday" from the perspective of S3 Inventory.
"""
now = datetime.datetime.utcnow()
yesterday = now - datetime.timedelta(days=1)
# This might be different depending on when you set
# s3 inventory to run
return yesterday.strftime("%Y-%m-%dT04-00Z")
def get_manifest_file_location(prefix):
_date = _get_latest_inventory_date_prefix()
manifest_location = "/".join([prefix, _date, "manifest.json"])
return manifest_location
def get_inventory_file_list(bucket, prefix):
inventory_manifest = get_manifest_file_location(prefix)
manifest = s3_client.get_object(Bucket=bucket, Key=inventory_manifest)
body = manifest['Body']
content = body.read()
_json = json.loads(content.decode("utf-8"))
return [k['key'] for k in _json.get('files')]
def _has_grant(grants, cuid):
for grant in grants:
if grant.get('Grantee', {}).get('ID') == cuid:
return True
return False
def set_permissions(bucket, files):
for key in files:
object_acl = s3_resource.ObjectAcl(bucket, key)
owner = object_acl.owner
grants = object_acl.grants
if not _has_grant(grants, ACCOUNT_CUID):
grants.append({
'Grantee': {
'ID': ACCOUNT_CUID,
'Type': 'CanonicalUser'
},
'Permission': 'FULL_CONTROL'
})
acl = {
'Grants': grants,
'Owner': owner
}
object_acl.put(AccessControlPolicy=acl)
def get_latest_inventory_s3_files(bucket, prefix):
files = get_inventory_file_list(bucket, prefix)
set_permissions(bucket, files)
s3_paths = ["s3://{}/{}".format(bucket, f) for f in files]
return s3_paths
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment