pete-otaqui/download-website.sh

## download-website.sh
#!/bin/bash

wget -E -k -r -p -e robots=off https://some-site.com/docs/

#### Note the following arguments:
# -E : converts downloaded HTML filenames to have a ".html" suffix
# -k : converts internal links within downloaded files to point to other downloaded files
# -r : recursively download by scanning for internal links in pages
# -p : download "page requisites", i.e. images, styles, scripts
# -e robots=off : ignore robots.txt (because some sites use it to avoid indexing)

#### Other useful arguments
# --no-parent : don't ascend in the path hierarchy (useful for just getting a "/docs/" section)
# -A "/index.html,*.svg,*/docs/*" : comma-separated "accept list", can use patterns
# -R "*.eot,*.woff,/archive" : comma-separated "reject list", can use patterns
# -L : spans host names, careful you don't try to download the entire web
	#!/bin/bash

	wget -E -k -r -p -e robots=off https://some-site.com/docs/

	#### Note the following arguments:
	# -E : converts downloaded HTML filenames to have a ".html" suffix
	# -k : converts internal links within downloaded files to point to other downloaded files
	# -r : recursively download by scanning for internal links in pages
	# -p : download "page requisites", i.e. images, styles, scripts
	# -e robots=off : ignore robots.txt (because some sites use it to avoid indexing)

	#### Other useful arguments
	# --no-parent : don't ascend in the path hierarchy (useful for just getting a "/docs/" section)
	# -A "/index.html,.svg,/docs/*" : comma-separated "accept list", can use patterns
	# -R ".eot,.woff,/archive" : comma-separated "reject list", can use patterns
	# -L : spans host names, careful you don't try to download the entire web