Skip to content

Instantly share code, notes, and snippets.

@BeFiveINFO
Created February 8, 2018 23:44
Show Gist options
  • Save BeFiveINFO/33dd291be922373da414a50ac7c0a62e to your computer and use it in GitHub Desktop.
Save BeFiveINFO/33dd291be922373da414a50ac7c0a62e to your computer and use it in GitHub Desktop.
Bulk validate for Nu Html Checker
#!/bin/sh
# Requirements:
# - Nu Html Checker Validation server, preferably the one running on your own.
# - curl
# Usage:
# bulk_validation -l http://validation.server.address urllist.txt resultsouput.json
# -l option is to specify a validation server http address.
# The urllist.txt has a list of url with one url each line.
# The results will output as a json array in a file
# Set options
# @see https://code.i-harness.com/ja/q/2eef9
POSITIONAL=()
while [[ $# -gt 0 ]]
do
key="$1"
case $key in
-l)
VNURL="$2"
shift # past argument
shift # past value
;;
*) # unknown option
POSITIONAL+=("$1") # save it in an array for later
shift # past argument
;;
esac
done
set -- "${POSITIONAL[@]}" # restore positional parameters
# All the parameters should be set by now.
# Now set the command line paramters
FILENAME=$1
OUTFILENAME=$2
# Set default values if omitted.
# $1
if [[ -z $FILENAME ]]; then
# echo "Input File Name is assumed to be filelist.txt"
FILENAME=./filelist.txt
fi
# $2
if [[ -z $OUTFILENAME ]]; then
# echo "Output File Name is assumed to be results.json"
OUTFILENAME=./results.json
fi
# default is localhost
if [[ -z $VNURL ]]; then
# echo "l option omitted. Checker is assumed to be located at http://localhost:8080/"
VNURL=http://localhost:8080/
fi
# Main program begins
echo "Starting validation"
echo "Validation server:\t${VNURL}"
echo "File list filename:\t${FILENAME}"
echo "Output Filename:\t${OUTFILENAME}"
# Set IFS to new line otherwise the array will be splited with each space
IFS=$'\n'
# Variables used in the loop below
COUNTER=1
VALIDATION_RESULTS_JSON=()
# Loop through each URL listed in the file
while read page_url; do
if [[ -n $page_url ]]; then
# echo "http://localhost:8080/?out=json&doc=$page_url"
echo "Processing #$COUNTER : $page_url"
VALIDATION_RESULT=`curl -X GET "http://localhost:8080/?out=json&doc=$page_url"`
# remove all the new lines
VALIDATION_RESULT=`echo ${VALIDATION_RESULT} | sed -e "s/[\r\n]\+//g"`
# Perl used to remove all the tabs
VALIDATION_RESULT=`echo ${VALIDATION_RESULT} | perl -i -pe 's/[ \t]+//g'`
# Remove all the escaped new lines
VALIDATION_RESULT=`echo ${VALIDATION_RESULT} | sed -e "s/\\n\+//g"`
# Add the processed result to an array
VALIDATION_RESULTS_JSON+=("$VALIDATION_RESULT")
# Count up.
COUNTER=`expr $COUNTER + 1`
fi
done < $FILENAME
# Join json objects with ,
VALIDATION_RESULTS_JOINED="$(IFS=,; echo "${VALIDATION_RESULTS_JSON[*]}")"
# Make is an array with the objects
VALIDATION_RESULTS_JOINED="[$VALIDATION_RESULTS_JOINED]"
# Output to a file
echo $VALIDATION_RESULTS_JOINED > $OUTFILENAME
echo "Validation complete"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment