Skip to content

Instantly share code, notes, and snippets.

@robole
Last active January 4, 2023 21:48
Show Gist options
  • Save robole/1958e421923142d4bae480dc75759a18 to your computer and use it in GitHub Desktop.
Save robole/1958e421923142d4bae480dc75759a18 to your computer and use it in GitHub Desktop.
Tests the links in a sitemap XML file to verify that they are active links. It will output a CSV with the URL and its HTTP status code.
#!/bin/bash
#
# Tests the links in a sitemap XML file to verify that they are active links. It
# will output a CSV with the URL and its HTTP status code.
#
# Requires: xmllint and curl (typically installed on Unix-like systems)
function _help() {
echo "Description: Test the links in a sitemap XML file to see if they are active webpages. It produces a CSV file with the URL and HTTP status code of each link. By default, it will write to a file named 'output.csv'."
echo ""
echo "Usage: sitetest [sitemap file] [output file (optional)]"
}
function _test(){
echo "Testing your website now"
infile=$1
outfile="output.csv"
if [[ -n "$2" ]]; then
outfile="$2"
fi
# remove outputfile if exists already
if [[ -f "$outfile" ]]; then
rm "$outfile" > /dev/null
fi
output=$(xmllint --xpath "//*[local-name()='loc']/text()" "$infile")
errors=0
counter=0
echo "URL,HTTP Status Code" >> "$outfile"
for link in $output; do
echo -n "."
code=$(curl -I "$link" 2>/dev/null| head -n 1 | cut -d$' ' -f2)
echo "$link,$code" >> "$outfile"
((counter+=1))
if [ "$code" != "200" ]; then
((errors+=1))
fi
if [[ $((counter % 10)) == 0 ]]; then
wait # wait if background tasks (curl commands) have not finished
echo -n ","
fi
#
sleep 1s
done
printf "\nLinks: %d" "$counter"
printf "\nErrors: %d\n" "$errors"
}
case "$#" in
0)
_help
;;
1)
_test "$1"
;;
2)
_test "$1" "$2"
;;
esac
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment