Last active
January 24, 2024 13:14
-
-
Save hamoid/a9b0bdc1c96e6e6995cfad6f4b069279 to your computer and use it in GitHub Desktop.
Downloads a just-the-docs website and converts it into a PDF file for offline reading
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
mkdir -p /tmp/manual | |
cd /tmp/manual | |
# curl downloads the index page of the website | |
# grep extracts the <nav> ... </nav> section | |
# sed(1) injects a line break in front of every URL and adds the full domain | |
# sed(2) deletes from each line the " character and everything that follows, leaving the clean URL | |
# tail deletes the first line, which contains a lonely <nav> tag | |
urlstr=$(curl -s "https://guide.openrndr.org" | grep -o -E '<nav .*</nav>' | sed "s/href=\"\//href=\"\nhttps:\/\/guide.openrndr.org\//g" | sed "s/\".*//g" | tail +2) | |
# convert a long string into an array | |
urls=($urlstr) | |
# count how many items in the array | |
length=${#urls[@]} | |
echo "Found $length URLs" | |
# one by one create NNNN.pdf files from each URL | |
for (( i=0; i<${length}; i++ )); | |
do | |
padded=$(printf "%04d" $i) | |
wkhtmltopdf ${urls[$i]} $padded.pdf | |
done | |
date=$(date +"%F") | |
# finally join all the PDF files into one | |
pdfunite *.pdf /tmp/openrndr-guide-$date.pdf |
Dependencies: wkhtmltopdf
, curl
, sed
, pdfunite
, grep
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Currently tied to guide.openrndr.org but can be adapted for other websites