Created
August 9, 2023 09:57
-
-
Save pabsan-0/7bc5147850753bae952e9d0b047cd4e4 to your computer and use it in GitHub Desktop.
Alcampo.es nutritional value web scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Alcampo.es nutritional value web scraper. | |
# Usage: | |
# $ alcampo.sh <URL> | |
# | |
# Scrape product title | |
curl -s "$1" | tr "h1" "\n" | grep '"name":"' | sed 's:^.*name"\:"::' | sed 's:"."description.*$::g' | |
echo | |
# Scrape product nutritional value | |
# cat index.html | \ | |
# tr " " "\n" | \ # spread html converting spaces to newlines | |
# grep -a 1 'gras' | \ # look for the keyword in the html, yield 1 line below | |
# grep "</span>.*\u00a0" | \ # get value in nextline from spacechar in between value and unit | |
# sed 's:(Kcal)::g' | \ # remove (Kcal) from kcals only | |
# sed 's:</span></td><td><span>::g' | \ # remove preceding junk | |
# cat 1415sed 's:\\u.*$::g' \ # remove trailing junk | |
for macronutrient in Kcal carb Prot Gras | |
do | |
printf "$macronutrient \t" | |
done | |
echo | |
for macronutrient in Kcal carb Prot Gras | |
do | |
curl -s "$1" | \ | |
tr " " "\n" | \ | |
grep -A 1 "$macronutrient" | \ | |
grep "</span>.*\u00A0" | \ | |
sed 's:(Kcal)::g' | \ | |
sed 's:</span></td><td><span>::g' | \ | |
sed 's:\\u.*$::g' | tr -d '\n' | |
printf "\t" | |
done | |
echo |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment