Skip to content

Instantly share code, notes, and snippets.

@krosaen
Created June 8, 2018 20:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save krosaen/fc59f2e499bd759aec970aeeeda79e58 to your computer and use it in GitHub Desktop.
Save krosaen/fc59f2e499bd759aec970aeeeda79e58 to your computer and use it in GitHub Desktop.
sync s3 object to local cache using aws cli. pretty dumb, but provides some useful validation
#!/usr/bin/python3
import argparse
import os
import subprocess
import sys
def main():
parser = argparse.ArgumentParser('mirrors an s3 object to local root (~/local-data/s3)')
parser.add_argument('s3_path', help='s3 object path to copy down e.g bucket-name/key/path/to/object.txt')
args = parser.parse_args()
if args.s3_path.startswith('/'):
parser.error('s3_path should not start with slash. valid example: "bucket-name/key/path/to/object.txt"')
if not s3_object_exists(args.s3_path):
parser.error('{} is not a valid s3 object'.format(args.s3_path))
local_path = '~/local-data/s3/{}'.format(args.s3_path)
local_dir = os.path.dirname(local_path)
if not os.path.exists(local_dir):
os.makedirs(local_dir)
print("mkdir {}".format(local_dir))
if os.path.exists(local_path):
print("({} already exists)".format(local_path))
return
s3_cp_cmd = ['aws', 's3', 'cp', 's3://{}'.format(args.s3_path), local_path]
print(" ".join(s3_cp_cmd))
subprocess.check_call(s3_cp_cmd)
def s3_object_exists(s3_path):
bucket_name = s3_path.split('/')[0]
object_key = s3_path.split(bucket_name)[1].strip('/')
p = subprocess.Popen(
['aws',
's3api',
'head-object',
'--bucket',
bucket_name,
'--key',
object_key
],
stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
stdout, stderr = p.communicate()
return p.returncode == 0
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment