Skip to content

Instantly share code, notes, and snippets.

@sulhome
Last active August 28, 2016 23:06
Show Gist options
  • Save sulhome/26472e386b254f9d5dd897806f558530 to your computer and use it in GitHub Desktop.
Save sulhome/26472e386b254f9d5dd897806f558530 to your computer and use it in GitHub Desktop.
bbc top news using bash V2
#!/usr/bin/env bash
function command_exists () {
type "$1" &> /dev/null ;
}
function print_error() {
printf "\e[31m Error \e[0m: %s \n" "$1"
}
# Check that required tools are installed
if ! command_exists hxnormalize || ! command_exists hxselect; then
print_error "you need html-xml-utils to run this program. please visit http://packages.ubuntu.com/trusty/text/html-xml-utils"
exit 1
fi
# Get the initial list 'ul'
rawList=$(wget -O - -q "http://www.bbc.co.uk/news/popular/read" |
hxnormalize -x 2>/dev/null |
hxselect -i "ul.most-popular-page__list")
# replace relative addresses with full address
stringToMatch="href=\"\/"
stringToReplace="href=\"http://bbc.co.uk/"
replaceRelLinks=$(echo "${rawList//$stringToMatch/$stringToReplace}");
fullOutput=""
counter=1
result="init"
while [ "$result" != "" ]
do
# Extract one 'li' at a time then extract href and new item text
result=$(echo "${replaceRelLinks}" |
hxselect -i -c "li.most-popular-page-list-item:nth-child($counter)")
href=$(echo ${result} | hxselect -i "a.most-popular-page-list-item__link" | grep -oP '(?<=href=")[^"]*(?=")')
topic=$(echo ${result} | hxselect -i -c "span.most-popular-page-list-item__headline" )
if [[ "$result" != "" ]]; then
fullOutput+="\e[1m\e[92m ${counter}) \e[0m \e[1m${topic}\e[0m \n ${href}\n\n"
fi
counter=$((counter + 1))
done
# Print result
dateTime=$(date +"%m-%d-%Y at %H:%M:%S")
printf "\n\e[1m\e[34m Top bbc news\e[0m $dateTime\n\n$fullOutput"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment