Last active
October 13, 2016 15:54
-
-
Save lidio601/428997f97216d0c9204122b16f62bb81 to your computer and use it in GitHub Desktop.
This is a simple Web Page validator which keep a local cache copy and check if the web page content over time. This is useful in case your website got replaced for some reason, while the web server keep answering HTTP 200 but the actual content has been changed. This script is intended to run within a monit script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#WEBSITE="http://example.com" | |
#THRES=3 | |
#FORCE=$3 | |
CACHEDIR=/tmp | |
if [ -t 1 ] | |
then | |
: # running from terminal | |
DEBUG="1" | |
else | |
: # not running from terminal, cron maybe | |
DEBUG="0" | |
fi | |
function get_cachefile { | |
local url="$1" | |
#local hash=$(echo "$url" | md5) | |
local hash=$(echo "$url" | md5sum | awk '{ print $1 }') | |
local cachefile="$CACHEDIR/$hash.html.cac" | |
echo $cachefile | |
} | |
function fetch_from_cache { | |
local url="$1" | |
local filepath=$(get_cachefile $1) | |
if [ ! -f "$filepath" ]; then | |
#[ "$DEBUG" -eq "1" ] && echo "# no cache copy of $url" | |
filepath="" | |
fi | |
echo $filepath | |
} | |
function fetch_from_remote { | |
local url="$1" | |
local tempfile=$(mktemp) | |
curl --silent -I "$url" | head -1 >$tempfile | |
[ "$?" != "0" ] && return | |
curl --silent "$url" >>$tempfile | |
[ "$?" != "0" ] && return | |
echo $tempfile | |
#[ "$DEBUG" -eq "1" ] && echo "# new version loaded of $url" | |
} | |
function count_difference { | |
local f1="$1" | |
local f2="$2" | |
#diff -U 0 "$cache" "$new" | grep -v ^@ | |
#comm -1 "$cache" "$new" | |
diff "$f1" "$f2" | grep ^[\>\<] | wc -l | |
} | |
function run { | |
local url="$1" | |
local thres="$2" | |
local force_renew="$3" | |
[ "$DEBUG" -eq "1" ] && echo "# fetching url $url" | |
if [ -z "$thres" ]; then | |
thres=0 | |
fi | |
if [ -z "$force_renew" ]; then | |
force_renew=0 | |
fi | |
cachepath=$(get_cachefile $url) | |
[ "$DEBUG" -eq "1" ] && echo "# cache file: $cachepath" | |
new=$(fetch_from_remote $url) | |
if [ -z "$new" ]; then | |
[ "$DEBUG" -eq "1" ] && echo "# unable to fetch a new version of $url" | |
exit 1 | |
fi | |
[ "$DEBUG" -eq "1" ] && echo "# temp file: $new" | |
#ts=$(stat -f "%Sm" "$new") | |
ts=$(stat --format "%y" "$new") | |
[ "$DEBUG" -eq "1" ] && echo "# new version fetched - $ts" | |
cache=$(fetch_from_cache $url) | |
if [ -z "$cache" ]; then | |
[ "$DEBUG" -eq "1" ] && echo "# no cache copy found" | |
mv "$new" "$cachepath" | |
[ "$DEBUG" -eq "1" ] && echo "# no previous version found - considering that it's fine" | |
exit 0 | |
fi | |
#ts=$(stat -f "%Sm" "$cache") | |
ts=$(stat --format "%y" "$cache") | |
[ "$DEBUG" -eq "1" ] && echo "# cache copy - $ts" | |
count=$(count_difference "$cache" "$new") | |
[ "$DEBUG" -eq "1" ] && echo "# difference count: $count [thres: $thres]" | |
if [ "$count" -gt "$thres" ]; then | |
if [ "$force_renew" -eq "1" ]; then | |
mv "$new" "$cachepath" | |
#ts=$(stat -f "%Sm" "$cachepath") | |
ts=$(stat --format "%y" "$cachepath") | |
[ "$DEBUG" -eq "1" ] && echo "# renewed cache file at $ts" | |
else | |
date | mail -s "difference found for $1" -a $new -a $cache root | |
fi | |
exit 1 | |
fi | |
mv "$new" "$cachepath" | |
exit 0 | |
} | |
# test 1 | |
#CACHEFILE=$(get_cachefile $WEBSITE) | |
#echo "Cache file: $CACHEFILE" | |
# test 2 | |
#cache=$(fetch_from_cache $WEBSITE) | |
#echo "Cache copy: $cache" | |
# test 3 | |
#new=$(fetch_from_remote $WEBSITE) | |
#echo "Newly fetched: $new" | |
# test 4 | |
#echo "Running" | |
#WEBSITE="http://example.com/" | |
#THRES=3 | |
#FORCE=$3 | |
#run $WEBSITE $THRES $FORCE | |
if [ -z "$1" ]; then | |
echo "Usage $0 <URL> <DIFFERENCE-THRESHOLD> <FORCE-CACHE-RENEW>" | |
exit 1 | |
fi | |
run "$1" "$2" "$3" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment