|
#!/usr/bin/env python |
|
# -*- coding: utf-8 -*- |
|
""" |
|
This script is just a wrapper of the "aws s3api get-object" command to |
|
download the spacenet dataset. It requires a configured AWS cli |
|
see pages: |
|
https://aws.amazon.com/public-datasets/spacenet/ |
|
http://docs.aws.amazon.com/cli/latest/userguide/cli-chap-welcome.html |
|
""" |
|
from __future__ import print_function |
|
import argparse |
|
import os |
|
import subprocess |
|
from urlparse import urljoin |
|
|
|
|
|
def download_file(s3_path, rooturl): |
|
""" |
|
Downlad a single file from the s3://spacenet-dataset bucket. s3_path |
|
must be valid, a similar directory tree will be created locally. The |
|
manifest.txt file provides the list of paths. You can download it |
|
with the option: -p manifest.txt |
|
""" |
|
# left and right strip some character of s3_path |
|
s3_path = s3_path.strip().lstrip('./') |
|
# s3_path is a file only if it contains a '.' |
|
if '.' not in s3_path: |
|
return |
|
# if a file already exists do nothing (no download) |
|
if os.path.isfile(s3_path): |
|
print('skip file: "{}", because it already exists'.format(s3_path)) |
|
return |
|
# create the directory (and parents) for the new file |
|
try: |
|
dirname = os.path.dirname(s3_path) |
|
if dirname and not os.path.isdir(dirname): |
|
print('creating directory: {}'.format(dirname)) |
|
os.makedirs(dirname) |
|
except Exception as e: |
|
print('Exception: {}'.format(e)) |
|
# download the file with an 'aws s3api get-object' command |
|
try: |
|
print('downloading file: {} ...'.format(s3_path)) |
|
# command = ['aws', 's3api', 'get-object', |
|
# '--bucket', 'spacenet-dataset', '--request-payer', 'requester', |
|
# '--key', s3_path, s3_path] |
|
command = ["axel", "-n", "10", urljoin(rooturl, s3_path), "--output", s3_path] |
|
subprocess.check_call(command) |
|
except Exception as e: |
|
print('Exception: {}'.format(e)) |
|
|
|
|
|
def download_selection(selection_file_path, rooturl): |
|
""" |
|
Download files contained in a the selection_file, which must contain |
|
an s3 path on each line (as in the manifest file), for example: |
|
./manifest.txt |
|
./AOI_1_Rio/srcData/mosaic_8band/013022223121.tif |
|
""" |
|
with open(selection_file_path, 'r') as selection_file: |
|
for s3_path in selection_file: |
|
download_file(s3_path, rooturl) |
|
|
|
|
|
def main(args): |
|
""" |
|
Parse the arguments, and call a download function |
|
usage: spacenet_download.py [-h] [-p PATH | -s SELECTION] |
|
|
|
optional arguments: |
|
-h, --help show this help message and exit |
|
-p PATH, --path PATH a single s3 path to be downloaded |
|
-s SELECTION, --selection SELECTION |
|
path to a selection file. All s3 paths written in this |
|
file will be downloaded |
|
""" |
|
parser = argparse.ArgumentParser() |
|
group = parser.add_mutually_exclusive_group() |
|
group.add_argument("-p", "--path", |
|
help="a single s3 path to be downloaded") |
|
group.add_argument("-s", "--selection", |
|
help="path to a selection file. All s3 paths written in this " + |
|
"file will be downloaded") |
|
group.add_argument("-r", "--rooturl", |
|
help="url of the file to be download" + |
|
"file will be downloaded") |
|
args = parser.parse_args() |
|
if not args.rooturl: |
|
parser.error("-r is mandatory") |
|
if args.path: |
|
download_file(args.path, args.rooturl) |
|
elif args.selection: |
|
download_selection(args.selection, args.rooturl) |
|
else: |
|
parser.error('Use the -p or -s option, for example: -p manifest.txt') |
|
return 0 |
|
|
|
if __name__ == '__main__': |
|
import sys |
|
sys.exit(main(sys.argv)) |