mandyedi/website_downloader.py

## website_downloader.py
import subprocess

def runcmd(cmd, verbose = False, *args, **kwargs):

    process = subprocess.Popen(
        cmd,
        stdout = subprocess.PIPE,
        stderr = subprocess.PIPE,
        text = True,
        shell = True
    )
    std_out, std_err = process.communicate()
    if verbose:
        print(std_out.strip(), std_err)
    pass

domain = "PUT WEBSITE'S DOMAIN"

header = "Cookie: PUT REQUEST HEADER COOKIE"

link = "PUT WEBSITE'S LINK TO DOWNLOAD"

command = 'wget -r -nc -p -E -k -R "*sign_out*" --restrict-file-names=windows -D {d} -np --header="{h}" {l}'.format(d=domain, h=header, l=link)

runcmd(command, verbose = True)

'''
-r, --recursive: download the entire Web site.
-D, --domains website.org: don't follow links outside website.org.
-np, --no-parent: don't follow links outside the directory tutorials/html/.
-p, --page-requisites: get all the elements that compose the page (images, CSS and so on).
-E, --html-extension: save files with the .html extension.
-k, --convert-links: convert links so that they work locally, off-line.
--restrict-file-names=windows: modify filenames so that they will work in Windows as well.
-np, --no-clobber: don't overwrite any existing files (used in case the download is interrupted and resumed).
'''
	import subprocess

	def runcmd(cmd, verbose = False, args, *kwargs):

	process = subprocess.Popen(
	cmd,
	stdout = subprocess.PIPE,
	stderr = subprocess.PIPE,
	text = True,
	shell = True
	)
	std_out, std_err = process.communicate()
	if verbose:
	print(std_out.strip(), std_err)
	pass

	domain = "PUT WEBSITE'S DOMAIN"

	header = "Cookie: PUT REQUEST HEADER COOKIE"

	link = "PUT WEBSITE'S LINK TO DOWNLOAD"

	command = 'wget -r -nc -p -E -k -R "sign_out" --restrict-file-names=windows -D {d} -np --header="{h}" {l}'.format(d=domain, h=header, l=link)

	runcmd(command, verbose = True)

	'''
	-r, --recursive: download the entire Web site.
	-D, --domains website.org: don't follow links outside website.org.
	-np, --no-parent: don't follow links outside the directory tutorials/html/.
	-p, --page-requisites: get all the elements that compose the page (images, CSS and so on).
	-E, --html-extension: save files with the .html extension.
	-k, --convert-links: convert links so that they work locally, off-line.
	--restrict-file-names=windows: modify filenames so that they will work in Windows as well.
	-np, --no-clobber: don't overwrite any existing files (used in case the download is interrupted and resumed).
	'''