Created
July 24, 2019 13:01
-
-
Save Miesvanderlippe/1141beee3654a89b34a3f21191b44a94 to your computer and use it in GitHub Desktop.
Script to automatically clone a webpage using HTTrack, zip it, hash it and log all of the above.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Check to see if a pipe exists on stdin. | |
if [ -p /dev/stdin ]; then | |
while IFS= read line; do | |
echo "Downloading site: ${line}" | |
# Strip anything unsafe from url | |
SAFE_FILE_NAME=$(echo ${line} | sed -E 's/[^A-Za-z0-9._\-]/-/g') | |
# assumes https://github.com/epitron/mitm-adblock running for sakes of cookiebanner blocking etc. | |
httrack -r1 -c32 -P http://localhost:8118 $line -O $SAFE_FILE_NAME > $SAFE_FILE_NAME.log 2>&1 | |
# we're not using caching, this is for distribution only | |
rm -rf $SAFE_FILE_NAME/hts-cache/ | |
# Archive output | |
7z a $SAFE_FILE_NAME.zip $SAFE_FILE_NAME/* -bb2 -tzip >> $SAFE_FILE_NAME.log 2>&1 | |
# Save hashes both in log as well as in a seperate file | |
md5 $SAFE_FILE_NAME.zip | tee -a $SAFE_FILE_NAME.zip.md5 $SAFE_FILE_NAME.log | |
shasum $SAFE_FILE_NAME.zip | tee -a $SAFE_FILE_NAME.zip.sha1 $SAFE_FILE_NAME.log | |
done | |
else | |
echo "Pipe data into this script instead." | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment