Skip to content

Instantly share code, notes, and snippets.

@laughingman7743
Last active March 15, 2022 15:25
Show Gist options
  • Save laughingman7743/4e0d609c765553b600c6 to your computer and use it in GitHub Desktop.
Save laughingman7743/4e0d609c765553b600c6 to your computer and use it in GitHub Desktop.
redshift merge unload file
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import argparse
import gzip
import json
import os
from boto.s3.connection import S3Connection
from boto.s3.key import Key
import cStringIO
def unzip_merge_file(bucket, manifest_file, out_file):
if os.path.exists(out_file):
os.remove(out_file)
manifest_key = Key(bucket, manifest_file)
manifest = json.loads(manifest_key.get_contents_as_string())
with open(out_file, 'ab+') as f:
for url in manifest['entries']:
path = url['url'].replace('s3://' + bucket.name, '')
print path
key = Key(bucket, path)
stream = cStringIO.StringIO()
key.get_contents_to_file(stream)
stream.seek(0)
unzip = gzip.GzipFile(fileobj=stream)
f.write(unzip.read())
PARSER = argparse.ArgumentParser()
PARSER.add_argument('-b', '--bucket',
dest='bucket_name',
type=str,
required=True,
help='Bucket name')
PARSER.add_argument('-m', '--manifest',
dest='manifest',
type=str,
required=True,
help='Manifest file path')
PARSER.add_argument('-o', '--output',
dest='output',
type=str,
default='output.txt',
help='Output file path (default: output.txt)')
ARGS = PARSER.parse_args()
if __name__ == '__main__':
conn = S3Connection()
bucket = conn.get_bucket(ARGS.bucket_name)
unzip_merge_file(bucket, ARGS.manifest, ARGS.output)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment