Skip to content

Instantly share code, notes, and snippets.

@kaushalvivek
Created May 27, 2021 18:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kaushalvivek/ae97d94c2583721f8128e0be80b78f48 to your computer and use it in GitHub Desktop.
Save kaushalvivek/ae97d94c2583721f8128e0be80b78f48 to your computer and use it in GitHub Desktop.
Update ContentType for all objects in an S3 bucket based on file extension.
# imports
import boto3
import json
# constants
# note that the access key should be of a user who has write access to the bucket named here
BUCKET_NAME = '<>'
AWS_ACCESS_KEY_ID = '<>'
AWS_SECRET_ACCESS_KEY = '<>'
s3_resource = boto3.resource('s3', aws_access_key_id=AWS_ACCESS_KEY_ID,
aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
s3_client = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY_ID,
aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
bucket = s3_resource.Bucket(name=BUCKET_NAME)
class ContentType:
# add any other content types you want to this dict
# this script checks the object's extension to derive
# the intended content type
content_types = {
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.pdf': 'application/pdf',
'.docx': 'application/msword',
'.doc': 'application/msword',
'.mp4': 'video/mp4'
}
def get_content_type_from_name(self, name):
for ext in ContentType.content_types.keys():
if ext in name:
return ContentType.content_types[ext]
return
instance = ContentType()
def update_metadata_for_object(obj, etag, content_type):
s3_client.copy_object(Key=obj.key, Bucket=BUCKET_NAME,
CopySource={"Bucket": BUCKET_NAME, "Key": obj.key},
ContentType=content_type,
ContentDisposition='inline',
ACL='public-read', # makes the object publicly readable
CopySourceIfMatch=etag,
MetadataDirective="REPLACE")
return
def fetch_etag(obj):
metadata = s3_client.head_object(Bucket=BUCKET_NAME, Key=obj.key)
return metadata['ETag']
print("Initiating metadata update ...")
for i,obj in enumerate(bucket.objects.all()):
etag = fetch_etag(obj)
print(f"Updating file #{i+1} -- ETag : {etag}")
target_object = s3_resource.Object(BUCKET_NAME, obj.key)
content_type = instance.get_content_type_from_name(target_object.key)
if not content_type:
print("Extension not supported.")
continue
update_metadata_for_object(target_object, etag, content_type)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment