Skip to content

Instantly share code, notes, and snippets.

@lidio601
Last active October 13, 2016 15:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lidio601/428997f97216d0c9204122b16f62bb81 to your computer and use it in GitHub Desktop.
Save lidio601/428997f97216d0c9204122b16f62bb81 to your computer and use it in GitHub Desktop.
This is a simple Web Page validator which keep a local cache copy and check if the web page content over time. This is useful in case your website got replaced for some reason, while the web server keep answering HTTP 200 but the actual content has been changed. This script is intended to run within a monit script
#!/bin/bash
#WEBSITE="http://example.com"
#THRES=3
#FORCE=$3
CACHEDIR=/tmp
if [ -t 1 ]
then
: # running from terminal
DEBUG="1"
else
: # not running from terminal, cron maybe
DEBUG="0"
fi
function get_cachefile {
local url="$1"
#local hash=$(echo "$url" | md5)
local hash=$(echo "$url" | md5sum | awk '{ print $1 }')
local cachefile="$CACHEDIR/$hash.html.cac"
echo $cachefile
}
function fetch_from_cache {
local url="$1"
local filepath=$(get_cachefile $1)
if [ ! -f "$filepath" ]; then
#[ "$DEBUG" -eq "1" ] && echo "# no cache copy of $url"
filepath=""
fi
echo $filepath
}
function fetch_from_remote {
local url="$1"
local tempfile=$(mktemp)
curl --silent -I "$url" | head -1 >$tempfile
[ "$?" != "0" ] && return
curl --silent "$url" >>$tempfile
[ "$?" != "0" ] && return
echo $tempfile
#[ "$DEBUG" -eq "1" ] && echo "# new version loaded of $url"
}
function count_difference {
local f1="$1"
local f2="$2"
#diff -U 0 "$cache" "$new" | grep -v ^@
#comm -1 "$cache" "$new"
diff "$f1" "$f2" | grep ^[\>\<] | wc -l
}
function run {
local url="$1"
local thres="$2"
local force_renew="$3"
[ "$DEBUG" -eq "1" ] && echo "# fetching url $url"
if [ -z "$thres" ]; then
thres=0
fi
if [ -z "$force_renew" ]; then
force_renew=0
fi
cachepath=$(get_cachefile $url)
[ "$DEBUG" -eq "1" ] && echo "# cache file: $cachepath"
new=$(fetch_from_remote $url)
if [ -z "$new" ]; then
[ "$DEBUG" -eq "1" ] && echo "# unable to fetch a new version of $url"
exit 1
fi
[ "$DEBUG" -eq "1" ] && echo "# temp file: $new"
#ts=$(stat -f "%Sm" "$new")
ts=$(stat --format "%y" "$new")
[ "$DEBUG" -eq "1" ] && echo "# new version fetched - $ts"
cache=$(fetch_from_cache $url)
if [ -z "$cache" ]; then
[ "$DEBUG" -eq "1" ] && echo "# no cache copy found"
mv "$new" "$cachepath"
[ "$DEBUG" -eq "1" ] && echo "# no previous version found - considering that it's fine"
exit 0
fi
#ts=$(stat -f "%Sm" "$cache")
ts=$(stat --format "%y" "$cache")
[ "$DEBUG" -eq "1" ] && echo "# cache copy - $ts"
count=$(count_difference "$cache" "$new")
[ "$DEBUG" -eq "1" ] && echo "# difference count: $count [thres: $thres]"
if [ "$count" -gt "$thres" ]; then
if [ "$force_renew" -eq "1" ]; then
mv "$new" "$cachepath"
#ts=$(stat -f "%Sm" "$cachepath")
ts=$(stat --format "%y" "$cachepath")
[ "$DEBUG" -eq "1" ] && echo "# renewed cache file at $ts"
else
date | mail -s "difference found for $1" -a $new -a $cache root
fi
exit 1
fi
mv "$new" "$cachepath"
exit 0
}
# test 1
#CACHEFILE=$(get_cachefile $WEBSITE)
#echo "Cache file: $CACHEFILE"
# test 2
#cache=$(fetch_from_cache $WEBSITE)
#echo "Cache copy: $cache"
# test 3
#new=$(fetch_from_remote $WEBSITE)
#echo "Newly fetched: $new"
# test 4
#echo "Running"
#WEBSITE="http://example.com/"
#THRES=3
#FORCE=$3
#run $WEBSITE $THRES $FORCE
if [ -z "$1" ]; then
echo "Usage $0 <URL> <DIFFERENCE-THRESHOLD> <FORCE-CACHE-RENEW>"
exit 1
fi
run "$1" "$2" "$3"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment