Last active
August 29, 2015 14:23
-
-
Save ankona/8b02ea4e86cadfeb7172 to your computer and use it in GitHub Desktop.
List all the files in an S3 bucket
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
AWS_ACCESS_KEY_ID = '' # todo: insert aws key. | |
AWS_ACCESS_KEY_SECRET = '' # todo: insert aws secret. | |
import os | |
import boto | |
import boto.s3 | |
import os.path | |
import sys, getopt | |
import logging | |
from datetime import datetime | |
s3_search_prefix = "" | |
working_dir_path = "" | |
process_start_time = datetime.now() | |
execute_timestamp = process_start_time.strftime("%Y%m%d.%I%M%p") | |
if len(sys.argv) == 1: | |
print 'You must supply arguments. Use -h for help' | |
quit() | |
def print_help_text(): | |
print 'file_list_s3.py -p <s3prefix> -w <workdir> -m <mode>' | |
print "sample: python file_list_s3.py -w /home/ec2-user/s3_puller/ -p 05_21_14_2013_CAIA_Level_1_September -b readyforkapx-ondemand" | |
try: | |
opts, args = getopt.getopt(sys.argv[1:], "hp:w:m:b:", ["help", "s3prefix=", "workdir=", "bucket="]) | |
except getopt.GetoptError: | |
print_help_text() | |
sys.exit(2) | |
for opt, arg in opts: | |
if opt == '-h': | |
print_help_text() | |
sys.exit() | |
elif opt in ("-p", "--s3prefix"): | |
s3_search_prefix = arg | |
elif opt in ("-w", "--workdir"): | |
working_dir_path = arg | |
elif opt in ("-b", "--bucket"): | |
target_bucket = arg | |
if s3_search_prefix == "": | |
print ' YOU DID NOT SUPPLY A PREFIX. ALL ITEMS IN THE BUCKET WILL BE INSPECTED! THIS MAY TAKE A LONG TIME!' | |
if working_dir_path == "": | |
print 'you must supply the working directory path.' | |
print_help_text() | |
quit() | |
# create the initial path where we'll write out data as we import sessions into KAPx. | |
if not os.path.exists(working_dir_path): | |
os.makedirs(working_dir_path) | |
if not os.path.exists(working_dir_path + "/logs"): | |
os.makedirs(working_dir_path + "/logs") | |
log_file_name = execute_timestamp + ".file_list.txt" | |
err_log_file_name = execute_timestamp + ".file_list.ERRORS.txt" | |
#logging.basicConfig(filename=working_dir_path + "/logs/" + log_file_name) | |
logger = logging.getLogger("s3_importer") | |
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') | |
sh = logging.StreamHandler(sys.stdout) | |
sh.setLevel(logging.DEBUG) | |
sh.setFormatter(formatter) | |
logger.addHandler(sh) | |
fh = logging.FileHandler(working_dir_path + "/logs/" + log_file_name) | |
fh.setLevel(logging.DEBUG) | |
fh.setFormatter(formatter) | |
logger.addHandler(fh) | |
simple_error_formatter = logging.Formatter('%(message)s') | |
fhe = logging.FileHandler(working_dir_path + "/logs/" + err_log_file_name) | |
fhe.setLevel(logging.WARNING) | |
fhe.setFormatter(simple_error_formatter) | |
logger.addHandler(fhe) | |
logger.setLevel(logging.DEBUG) | |
logger.info("retrieving data from target s3 bucket: " + target_bucket) | |
logger.info("s3 searches limited by prefix: " + s3_search_prefix) | |
logger.info("working directory: " + working_dir_path) | |
conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_ACCESS_KEY_SECRET) | |
bucket = conn.get_bucket(target_bucket) | |
def list_files_from_s3(target_write_path): | |
if s3_search_prefix: | |
key_list = bucket.list(prefix=s3_search_prefix) | |
else: | |
key_list = bucket.list() | |
item_count = 0 | |
if not os.path.exists(target_write_path): | |
os.makedirs(target_write_path) | |
for k in key_list: | |
logger.info(k.name) | |
item_count += 1 | |
logger.info("{0} files found.".format(item_count)) | |
try: | |
list_files_from_s3(working_dir_path) | |
except Exception, ex: | |
logger.error(ex) | |
exit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment