Skip to content

Instantly share code, notes, and snippets.

@baharev
Forked from veselosky/s3gzip.py
Created May 21, 2018 19:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save baharev/b4d434a56e0b7aa2b0cf8134573482c1 to your computer and use it in GitHub Desktop.
Save baharev/b4d434a56e0b7aa2b0cf8134573482c1 to your computer and use it in GitHub Desktop.
How to store and retrieve gzip-compressed objects in AWS S3
# vim: set fileencoding=utf-8 :
#
# How to store and retrieve gzip-compressed objects in AWS S3
###########################################################################
#
# Copyright 2015 Vince Veselosky and contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import absolute_import, print_function, unicode_literals
from io import BytesIO
from gzip import GzipFile
import boto3
s3 = boto3.client('s3')
bucket = 'bluebucket.mindvessel.net'
# Read in some example text, as unicode
with open("utext.txt") as fi:
text_body = fi.read().decode("utf-8")
# A GzipFile must wrap a real file or a file-like object. We do not want to
# write to disk, so we use a BytesIO as a buffer.
gz_body = BytesIO()
gz = GzipFile(None, 'wb', 9, gz_body)
gz.write(text_body.encode('utf-8')) # convert unicode strings to bytes!
gz.close()
# GzipFile has written the compressed bytes into our gz_body
s3.put_object(
Bucket=bucket,
Key='gztest.txt', # Note: NO .gz extension!
ContentType='text/plain', # the original type
ContentEncoding='gzip', # MUST have or browsers will error
Body=gz_body.getvalue()
)
retr = s3.get_object(Bucket=bucket, Key='gztest.txt')
# Now the fun part. Reading it back requires this little dance, because
# GzipFile insists that its underlying file-like thing implement tell and
# seek, but boto3's io stream does not.
bytestream = BytesIO(retr['Body'].read())
got_text = GzipFile(None, 'rb', fileobj=bytestream).read().decode('utf-8')
assert got_text == text_body
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment