Skip to content

Instantly share code, notes, and snippets.

@holgerd77
Created May 4, 2015 12:40
Show Gist options
  • Save holgerd77/ff83c09fb5c7a8e90638 to your computer and use it in GitHub Desktop.
Save holgerd77/ff83c09fb5c7a8e90638 to your computer and use it in GitHub Desktop.
Script for Farmsubsidy to send folders with exported flat CSV files to S3 bucket
#Taken from: https://gist.github.com/SavvyGuard/6115006
#Usage: python send_folder_to_s3 COUNTRY_DIRNAME
#Dependencies: boto
import boto
import boto.s3
import os.path
import sys
# Fill these in - you get them when you sign up for S3
AWS_ACCESS_KEY_ID = ''
AWS_ACCESS_KEY_SECRET = ''
# Fill in info on data to upload
# destination bucket name
bucket_name = 'data.farmsubsidy.org'
if len(sys.argv) != 2:
print "Please run this script with (exactly) one COUNTRY_DIRNAME as argument!"
sys.exit()
if not os.path.isdir(sys.argv[1]):
print "Country directory does not exist!"
sys.exit()
# source directory
sourceDir = sys.argv[1] + '/'
# destination directory name (on s3)
destDir = 'Flat/' + sys.argv[1] + '/'
#max size in bytes before uploading in parts. between 1 and 5 GB recommended
MAX_SIZE = 20 * 1000 * 1000
#size of parts when uploading in parts
PART_SIZE = 6 * 1000 * 1000
conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_ACCESS_KEY_SECRET)
bucket = conn.get_bucket(bucket_name)
uploadFileNames = []
for (sourceDir, dirname, filename) in os.walk(sourceDir):
uploadFileNames.extend(filename)
break
def percent_cb(complete, total):
sys.stdout.write('.')
sys.stdout.flush()
for filename in uploadFileNames:
sourcepath = os.path.join(sourceDir + filename)
destpath = os.path.join(destDir, filename)
print 'Uploading %s to Amazon S3 bucket %s' % \
(sourcepath, bucket_name)
filesize = os.path.getsize(sourcepath)
if filesize > MAX_SIZE:
print "multipart upload"
mp = bucket.initiate_multipart_upload(destpath)
fp = open(sourcepath,'rb')
fp_num = 0
while (fp.tell() < filesize):
fp_num += 1
print "uploading part %i" %fp_num
mp.upload_part_from_file(fp, fp_num, cb=percent_cb, num_cb=10, size=PART_SIZE)
mp.complete_upload()
else:
print "singlepart upload"
k = boto.s3.key.Key(bucket)
k.key = destpath
k.set_contents_from_filename(sourcepath,
cb=percent_cb, num_cb=10)
k = boto.s3.key.Key(bucket)
k.key = destpath
k.make_public()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment