Created
October 20, 2023 17:31
-
-
Save greg-randall/84a61e50f723cd0a86a0a140031360f1 to your computer and use it in GitHub Desktop.
Diffs sites looking for changes to see if someone has posted a new job.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# create a file 'config.sh' where you set two variables one is your phone number the other is an api key from https://textbelt.com/ | |
#phone=########## | |
#apikey=####################################### | |
#the tool read through a file called "sites-to-diff.txt" which is a list of sites with one url per line. | |
source config.sh #get phone number & api key | |
function sanitize_file_name { | |
echo -n $1 | perl -pe 's/[\?\[\]\/\\=<>:;,''"&\$#*()|~`!{}%+]//g;' -pe 's/[\r\n\t -]+/-/g;' | |
} | |
printf "$(date)," >> changes.log.csv | |
while read url; do | |
cleaned="$(sanitize_file_name "$url")" | |
lynx -useragent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.79 Safari/537.1" -dump $url > pages/temporary.txt | |
if [ $(stat -c '%s' pages/temporary.txt) -ge 200 ]; then #make sure the file isn't blank (or tiny) | |
count=$(diff pages/temporary.txt pages/$cleaned.txt | wc -l | xargs) | |
printf "$count\t$url\t$cleaned\n" | |
printf "$count," >> changes.log.csv | |
if [ $count -ge 100 ]; then #if there are enough changes we'll text | |
echo "Check $url for job" | |
curl -X POST https://textbelt.com/text --data-urlencode phone=$phone --data-urlencode message="$count character changes found at $url check for jobs" -d key=$apikey >> phone.api.log #send the text message | |
printf "\n" >> phone.api.log | |
date=$(date +%s) | |
mv pages/$cleaned.txt pages/$cleaned.$date.before.txt #save the old version for comparison | |
cp pages/temporary.txt pages/$cleaned.$date.after.txt #save the new version for comparison | |
mv pages/temporary.txt pages/$cleaned.txt #save the nwe version for diffing | |
fi | |
else | |
echo "Skipping since lynx got nothing" | |
printf "," >> changes.log.csv | |
fi | |
done <sites-to-diff.txt | |
printf "\n" >> changes.log.csv |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment