Skip to content

Instantly share code, notes, and snippets.

@mandyedi
Created March 25, 2024 23:19
Show Gist options
  • Save mandyedi/2ce6f34144c4d7a22912ea277c7a9881 to your computer and use it in GitHub Desktop.
Save mandyedi/2ce6f34144c4d7a22912ea277c7a9881 to your computer and use it in GitHub Desktop.
import subprocess
def runcmd(cmd, verbose = False, *args, **kwargs):
process = subprocess.Popen(
cmd,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE,
text = True,
shell = True
)
std_out, std_err = process.communicate()
if verbose:
print(std_out.strip(), std_err)
pass
domain = "PUT WEBSITE'S DOMAIN"
header = "Cookie: PUT REQUEST HEADER COOKIE"
link = "PUT WEBSITE'S LINK TO DOWNLOAD"
command = 'wget -r -nc -p -E -k -R "*sign_out*" --restrict-file-names=windows -D {d} -np --header="{h}" {l}'.format(d=domain, h=header, l=link)
runcmd(command, verbose = True)
'''
-r, --recursive: download the entire Web site.
-D, --domains website.org: don't follow links outside website.org.
-np, --no-parent: don't follow links outside the directory tutorials/html/.
-p, --page-requisites: get all the elements that compose the page (images, CSS and so on).
-E, --html-extension: save files with the .html extension.
-k, --convert-links: convert links so that they work locally, off-line.
--restrict-file-names=windows: modify filenames so that they will work in Windows as well.
-np, --no-clobber: don't overwrite any existing files (used in case the download is interrupted and resumed).
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment