Created January 27, 2019 19:45
This script is to help for properly setting permissions so that you can read S3 Inventory data in an account that is not the owner of a bucket. It assumes that you have a ROLE_ARN that can assume a role in the main account that has Read Permissions and R/W for Object ACL permissions on your s3 bucket.
import datetime
import json
BUCKET_NAME = "<s3_bucket_name>"
INVENTORY_PREFIX = "<prefix_given_to_s3_inventory>" # Should have data/, hive/, and some dated folders inside of it
ACCOUNT_CUID = "<your_canonical_user_id_for_cross_account>" # Account which is not the owner of S3 bucket, but trying to access it. Controls ROLE_ARN
ROLE_ARN = "<role_in_cross_account_that_can_assume_to_main_account>"
def role_arn_to_session(role_arn):
client = boto3.client('sts')
response = client.assume_role(RoleArn=role_arn, RoleSessionName="S3InventoryCleanup")
return boto3.Session(
session = role_arn_to_session(ROLE_ARN)
s3_resource = session.resource('s3', region_name='us-east-1')
s3_client = session.client('s3', region_name='us-east-1')
def _get_latest_inventory_date_prefix():
Inventory dates are run for "yesterday" from the perspective of S3 Inventory.
now = datetime.datetime.utcnow()
yesterday = now - datetime.timedelta(days=1)
# This might be different depending on when you set
# s3 inventory to run
return yesterday.strftime("%Y-%m-%dT04-00Z")
def get_manifest_file_location(prefix):
_date = _get_latest_inventory_date_prefix()
manifest_location = "/".join([prefix, _date, "manifest.json"])
return manifest_location
def get_inventory_file_list(bucket, prefix):
inventory_manifest = get_manifest_file_location(prefix)
manifest = s3_client.get_object(Bucket=bucket, Key=inventory_manifest)
body = manifest['Body']
content =
_json = json.loads(content.decode("utf-8"))
return [k['key'] for k in _json.get('files')]
def _has_grant(grants, cuid):
for grant in grants:
if grant.get('Grantee', {}).get('ID') == cuid:
return True
return False
def set_permissions(bucket, files):
for key in files:
object_acl = s3_resource.ObjectAcl(bucket, key)
owner = object_acl.owner
grants = object_acl.grants
if not _has_grant(grants, ACCOUNT_CUID):
'Grantee': {
'Type': 'CanonicalUser'
'Permission': 'FULL_CONTROL'
acl = {
'Grants': grants,
'Owner': owner
def get_latest_inventory_s3_files(bucket, prefix):
files = get_inventory_file_list(bucket, prefix)
set_permissions(bucket, files)
s3_paths = ["s3://{}/{}".format(bucket, f) for f in files]
return s3_paths
