palashkulsh/download_full_website_using_wget.sh

## download_full_website_using_wget.sh
wget -l 1 --recursive --page-requisites --html-extension --domains elastic.co --no-parent https://www.elastic.co/guide/en/logstash/current/


--page-requisites > all css pics and all that
--html-extension > files downloaded with html extenstion
--domains > scrape files only of this domain
--no-parent > don't follow links outside the directory https://www.elastic.co/guide/en/logstash/current/.

converting website to pdf
 find   ../www.elastic.co/guide/en/logstash/current/ -iname "*.html" -exec electron-pdf {} {}.pdf \;

 After all files are converted to pdf
 go to website and find the order of pages from website like in this case oneliner gave the order of pdf

 $x('//*[@id="guide"]/div/div/div[1]/div[2]/div[2]/ul/li/ul/li/span/a/@href').forEach(function(k){console.log(k.nodeValue)})

 next combine the pdf in the order retrieved from prev command

 pdftk 1.pdf 2.pdf 3.pdf .... n.pdf cat output finalbook.pdf

 finally compress the final pdf with ghostscript

 gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen -dNOPAUSE -dQUIET -dBATCH -sOutputFile=compressedlogstashbook.pdf logstashbook.pdf
	wget -l 1 --recursive --page-requisites --html-extension --domains elastic.co --no-parent https://www.elastic.co/guide/en/logstash/current/


	--page-requisites > all css pics and all that
	--html-extension > files downloaded with html extenstion
	--domains > scrape files only of this domain
	--no-parent > don't follow links outside the directory https://www.elastic.co/guide/en/logstash/current/.

	converting website to pdf
	find ../www.elastic.co/guide/en/logstash/current/ -iname "*.html" -exec electron-pdf {} {}.pdf \;

	After all files are converted to pdf
	go to website and find the order of pages from website like in this case oneliner gave the order of pdf

	$x('//*[@id="guide"]/div/div/div[1]/div[2]/div[2]/ul/li/ul/li/span/a/@href').forEach(function(k){console.log(k.nodeValue)})

	next combine the pdf in the order retrieved from prev command

	pdftk 1.pdf 2.pdf 3.pdf .... n.pdf cat output finalbook.pdf

	finally compress the final pdf with ghostscript

	gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen -dNOPAUSE -dQUIET -dBATCH -sOutputFile=compressedlogstashbook.pdf logstashbook.pdf