Skip to content

Instantly share code, notes, and snippets.

@yjmade
Created December 5, 2016 02:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yjmade/bfcdd486abd6bad7336c768acff29ba3 to your computer and use it in GitHub Desktop.
Save yjmade/bfcdd486abd6bad7336c768acff29ba3 to your computer and use it in GitHub Desktop.
directory transfer by axel without to compress

in the origin server

first install SimpleTornadoServer

pip install SimpleTornadoServer

then generate the manifest of the dir to be download

find . > manifest.txt
python -m SimpleTornadoServer

then get this manifest in the target computer and download it.

ROOTURL=http://examlpe.com:8080/
wget $ROOTURL/manifest.txt
python download.py -r $ROOTURL -s manifest.txt
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
This script is just a wrapper of the "aws s3api get-object" command to
download the spacenet dataset. It requires a configured AWS cli
see pages:
https://aws.amazon.com/public-datasets/spacenet/
http://docs.aws.amazon.com/cli/latest/userguide/cli-chap-welcome.html
"""
from __future__ import print_function
import argparse
import os
import subprocess
from urlparse import urljoin
def download_file(s3_path, rooturl):
"""
Downlad a single file from the s3://spacenet-dataset bucket. s3_path
must be valid, a similar directory tree will be created locally. The
manifest.txt file provides the list of paths. You can download it
with the option: -p manifest.txt
"""
# left and right strip some character of s3_path
s3_path = s3_path.strip().lstrip('./')
# s3_path is a file only if it contains a '.'
if '.' not in s3_path:
return
# if a file already exists do nothing (no download)
if os.path.isfile(s3_path):
print('skip file: "{}", because it already exists'.format(s3_path))
return
# create the directory (and parents) for the new file
try:
dirname = os.path.dirname(s3_path)
if dirname and not os.path.isdir(dirname):
print('creating directory: {}'.format(dirname))
os.makedirs(dirname)
except Exception as e:
print('Exception: {}'.format(e))
# download the file with an 'aws s3api get-object' command
try:
print('downloading file: {} ...'.format(s3_path))
# command = ['aws', 's3api', 'get-object',
# '--bucket', 'spacenet-dataset', '--request-payer', 'requester',
# '--key', s3_path, s3_path]
command = ["axel", "-n", "10", urljoin(rooturl, s3_path), "--output", s3_path]
subprocess.check_call(command)
except Exception as e:
print('Exception: {}'.format(e))
def download_selection(selection_file_path, rooturl):
"""
Download files contained in a the selection_file, which must contain
an s3 path on each line (as in the manifest file), for example:
./manifest.txt
./AOI_1_Rio/srcData/mosaic_8band/013022223121.tif
"""
with open(selection_file_path, 'r') as selection_file:
for s3_path in selection_file:
download_file(s3_path, rooturl)
def main(args):
"""
Parse the arguments, and call a download function
usage: spacenet_download.py [-h] [-p PATH | -s SELECTION]
optional arguments:
-h, --help show this help message and exit
-p PATH, --path PATH a single s3 path to be downloaded
-s SELECTION, --selection SELECTION
path to a selection file. All s3 paths written in this
file will be downloaded
"""
parser = argparse.ArgumentParser()
group = parser.add_mutually_exclusive_group()
group.add_argument("-p", "--path",
help="a single s3 path to be downloaded")
group.add_argument("-s", "--selection",
help="path to a selection file. All s3 paths written in this " +
"file will be downloaded")
group.add_argument("-r", "--rooturl",
help="url of the file to be download" +
"file will be downloaded")
args = parser.parse_args()
if not args.rooturl:
parser.error("-r is mandatory")
if args.path:
download_file(args.path, args.rooturl)
elif args.selection:
download_selection(args.selection, args.rooturl)
else:
parser.error('Use the -p or -s option, for example: -p manifest.txt')
return 0
if __name__ == '__main__':
import sys
sys.exit(main(sys.argv))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment