mikeyp/makefile

## makefile

ts := $(shell /bin/date "+%Y%m%d%H%M%S")
root_domain := example.com
archive_file := ${root_domain}-archive-$(ts)

all: clean generate

clean:
	rm -rf *o archive;
	rm -rf *o ${root_domain}-archive*.tar.gz;
	rm -rf *o ${root_domain}-archive*;

generate:
	# Scrape the site
	httrack "https://${root_domain}"  -w -O "./archive" -I0 -N "%h%p/%n/index%[page].%t" -%v --robots=0 -c10 -%e0

	# Move the archive out of the httrack directory structure and delete it's cache
	mv archive/${root_domain}/ ${archive_file}
	rm -rf *o archive

	# Move the index page and delete it's directory
	mv ${archive_file}/index/index.html ${archive_file}/index.html
	rm -rf *0 ${archive_file}/index/

	# copy the 404 page
	mkdir ${archive_file}/404/
	cp 404.html ${archive_file}/404/index.html

	# Update the links to remove 'index.html' from the end - this requires correct server setup
	find ${archive_file} -name '*.html' -exec sed -i -e s/index\\/index.html//g {} \;
	find ${archive_file} -name '*.html' -exec sed -i -e s/index.html//g {} \;

	# Remove all the *-e files
	find ${archive_file} -name '*.html-e' -delete

	# create the tarball
	tar -cvzf ${archive_file}.tar.gz ${archive_file}/

	ts := $(shell /bin/date "+%Y%m%d%H%M%S")
	root_domain := example.com
	archive_file := ${root_domain}-archive-$(ts)

	all: clean generate

	clean:
	rm -rf *o archive;
	rm -rf o ${root_domain}-archive.tar.gz;
	rm -rf o ${root_domain}-archive;

	generate:
	# Scrape the site
	httrack "https://${root_domain}" -w -O "./archive" -I0 -N "%h%p/%n/index%[page].%t" -%v --robots=0 -c10 -%e0

	# Move the archive out of the httrack directory structure and delete it's cache
	mv archive/${root_domain}/ ${archive_file}
	rm -rf *o archive

	# Move the index page and delete it's directory
	mv ${archive_file}/index/index.html ${archive_file}/index.html
	rm -rf *0 ${archive_file}/index/

	# copy the 404 page
	mkdir ${archive_file}/404/
	cp 404.html ${archive_file}/404/index.html

	# Update the links to remove 'index.html' from the end - this requires correct server setup
	find ${archive_file} -name '*.html' -exec sed -i -e s/index\\/index.html//g {} \;
	find ${archive_file} -name '*.html' -exec sed -i -e s/index.html//g {} \;

	# Remove all the *-e files
	find ${archive_file} -name '*.html-e' -delete

	# create the tarball
	tar -cvzf ${archive_file}.tar.gz ${archive_file}/