Skip to content

Instantly share code, notes, and snippets.

@vp777
Last active October 20, 2020 16:53
Show Gist options
  • Save vp777/1e6d5520c49cc4ca3d84e59c8a3296e6 to your computer and use it in GitHub Desktop.
Save vp777/1e6d5520c49cc4ca3d84e59c8a3296e6 to your computer and use it in GitHub Desktop.
A small script that attempts to identify when an http resource was last modified. It achieves that by either using the Last-Modified response header or through binary search on the If-Modified-Since/If-Unmodified-Since request headers.
#!/bin/bash
#not sure how useful If-Modified-Since/If-Unmodified-Since would be if Last-Modified header is not available:)
resource=${1?Missing the URL of the resource}
accuracy=${2:-2} #by default, tries to identify the first 2 most significant metrics, the year and month
function replacer {
local result i pattern replacement
result=$1
for i in `seq 2 2 $#`;do
pattern=$i
replacement=$((i+1))
result=${result/"${!pattern}"/"${!replacement}"}
done
printf %s "$result"
}
function test_ifmod_since {
[[ $1 -eq $mod_resp_code_ifmod ]] && echo 1 || echo 0
}
function test_ifunmod_since {
[[ $1 -eq $mod_resp_code_ifunmod ]] && echo 0 || echo 1
}
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"
last_modified=$(curl -H "User-Agent: $user_agent" -o /dev/null "$resource" -v |& grep 'Last-Modified:')
if [[ ! -z $last_modified ]]; then
last_modified=${last_modified#*, }
day=${last_modified%% *}
last_modified=${last_modified#* }
month=${last_modified%% *}
last_modified=${last_modified#* }
year=${last_modified%% *}
echo "$year $month $day LM"
exit
fi
old_date="Sat, 1 Jan 2000 00:00:00 GMT"
read DAY YYYY MON DD <<<$(date +'%a %Y %b %d' -d '1 day ago')
current_date="$DAY, $DD $MON $YYYY 00:00:00 GMT"
base_resp_code_ifmod=$(curl -H "User-Agent: $user_agent" -s -o /dev/null -w "%{http_code}" -H "If-Modified-Since: ${old_date}" "$resource")
mod_resp_code_ifmod=$(curl -H "User-Agent: $user_agent" -s -o /dev/null -w "%{http_code}" -H "If-Modified-Since: ${current_date}" "$resource")
[[ $base_resp_code_ifmod -ne $mod_resp_code_ifmod ]] && {
echo "If-Modified-Since: supported ($base_resp_code_ifmod,$mod_resp_code_ifmod)"
testing_func=test_ifmod_since
header="If-Modified-Since"
}
base_resp_code_ifunmod=$(curl -H "User-Agent: $user_agent" -s -o /dev/null -w "%{http_code}" -H "If-Unmodified-Since: ${current_date}" "$resource")
mod_resp_code_ifunmod=$(curl -H "User-Agent: $user_agent" -s -o /dev/null -w "%{http_code}" -H "If-Unmodified-Since: ${old_date}" "$resource")
[[ $base_resp_code_ifunmod -ne $mod_resp_code_ifunmod ]] && {
echo "If-Unmodified-Since: supported ($base_resp_code_ifunmod,$mod_resp_code_ifunmod)"
testing_func=test_ifunmod_since
header="If-Unmodified-Since"
}
[[ -z $testing_func ]] && {
echo "Non of the headers appear to be supported, trying with If-Modified-Since"
testing_func=test_ifmod_since
header="If-Modified-Since"
}
header_template="$header: Mon, %DAY% %MONTH% %YEAR% %HOUR%:%MIN%:%SEC% GMT"
sec=({00..59})
min=({00..59})
hour=({00..23})
days=({01..31})
months=(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec)
years=($(eval echo {2000..$(date +"%Y")}))
var_iter_order=(years months days hour min sec)
pattern_iter_order=(%YEAR% %MONTH% %DAY% %HOUR% %MIN% %SEC%)
declare -A replacements=([${var_iter_order[0]}]=2020 [${var_iter_order[1]}]=Jan [${var_iter_order[2]}]=1)
for i in `seq 3 5`;do
replacements[${var_iter_order[$i]}]="00"
done
for i in `seq $accuracy 1 $((${#var_iter_order[@]}-1))`;do
current_var_name=${var_iter_order[$i]}
header_template=$(replacer "$header_template" "${pattern_iter_order[i]}" "${replacements[$current_var_name]}")
unset replacements[$current_var_name]
done
var_iter_order=("${var_iter_order[@]:0:$accuracy}")
pattern_iter_order=("${pattern_iter_order[@]:0:$accuracy}")
for ((i=0;i<${#var_iter_order[@]};i++));do
current_var_name=${var_iter_order[i]}
current_var=${current_var_name}[@]
current_var=("${!current_var}")
pre_ifmod_header=${header_template}
for j in `seq 1 1 $((${#var_iter_order[@]}-1))`;do
((relative_j=(i+j)%${#var_iter_order[@]}))
replacement_var=${var_iter_order[relative_j]}
pre_ifmod_header=$(replacer "$pre_ifmod_header" "${pattern_iter_order[relative_j]}" "${replacements[$replacement_var]}")
done
left=0
right=$((${#current_var[@]}-1))
while [[ $left -le $right ]];do
((middle=(left+right)/2))
current_var_element=${current_var[middle]}
ifmod_header=$(replacer "$pre_ifmod_header" "${pattern_iter_order[i]}" "$current_var_element")
resp_code=$(curl -H "User-Agent: $user_agent" -s -o /dev/null -w "%{http_code}" -H "${ifmod_header}" "$resource")
if [[ $($testing_func $resp_code) == 1 ]]; then
right=$((middle-1))
else
left=$((middle+1))
fi
done
replacements[${current_var_name}]=${current_var[$((right>=0?right:0))]}
done
for var_name in "${var_iter_order[@]}";do
printf "${replacements[$var_name]} "
done
[[ $testing_func == test_ifmod_since ]] && printf IM || printf UM
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment