rickerp/getfiles.py

## getfiles.py
#!/usr/bin/python
import re
import requests
import os
import argparse

ap = argparse.ArgumentParser()
ap.add_argument('URL', help='URL to download files')
ap.add_argument('regex', help='Regex to detect the subdirectories in html')
ap.add_argument("-d", "--directory", help="Directory", default=os.getcwd() + os.sep)
ap.add_argument('-a', '--auth', nargs=2, help="Authentication credentials if needed for request")
args = vars(ap.parse_args())


def download_file(url, path=None, request_params={}):
    local_filename = path if path else url.split('/')[-1]
    # NOTE the stream=True parameter below
    with requests.get(url, stream=True, **request_params) as r:
        r.raise_for_status()
        with open(local_filename, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                # If you have chunk encoded response uncomment if
                # and set chunk_size parameter to None.
                # if chunk:
                f.write(chunk)
    return local_filename


def getfiles(url, reg, folder=os.getcwd() + '/', auth=None):
    print(f"Requesting {url}")
    entries = re.findall(reg, requests.get(url, auth=tuple(auth)).text)[1:]
    for e in entries:
        if e[-1] == '/':
            os.mkdir(folder + e)
            getfiles(url + e, reg, folder + e, auth)
        else:
            print(f"Requesting {url + e}")
            download_file(url + e, folder + e, {'auth': tuple(auth)})


getfiles(args['URL'], args['regex'], args['directory'], args['auth'])
	#!/usr/bin/python
	import re
	import requests
	import os
	import argparse

	ap = argparse.ArgumentParser()
	ap.add_argument('URL', help='URL to download files')
	ap.add_argument('regex', help='Regex to detect the subdirectories in html')
	ap.add_argument("-d", "--directory", help="Directory", default=os.getcwd() + os.sep)
	ap.add_argument('-a', '--auth', nargs=2, help="Authentication credentials if needed for request")
	args = vars(ap.parse_args())


	def download_file(url, path=None, request_params={}):
	local_filename = path if path else url.split('/')[-1]
	# NOTE the stream=True parameter below
	with requests.get(url, stream=True, **request_params) as r:
	r.raise_for_status()
	with open(local_filename, 'wb') as f:
	for chunk in r.iter_content(chunk_size=8192):
	# If you have chunk encoded response uncomment if
	# and set chunk_size parameter to None.
	# if chunk:
	f.write(chunk)
	return local_filename


	def getfiles(url, reg, folder=os.getcwd() + '/', auth=None):
	print(f"Requesting {url}")
	entries = re.findall(reg, requests.get(url, auth=tuple(auth)).text)[1:]
	for e in entries:
	if e[-1] == '/':
	os.mkdir(folder + e)
	getfiles(url + e, reg, folder + e, auth)
	else:
	print(f"Requesting {url + e}")
	download_file(url + e, folder + e, {'auth': tuple(auth)})


	getfiles(args['URL'], args['regex'], args['directory'], args['auth'])