Created
February 10, 2017 18:16
-
-
Save pcurylo/c6a32403ec2d4f5d2873f0a86135a9c0 to your computer and use it in GitHub Desktop.
Get Travel Warnings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# acquires and cleans travel warnings from US State Department | |
curl 'https://travel.state.gov/content/passports/en/alertswarnings.html' \ | |
| tr "\n" "|" \ | |
| grep -o '<table.*</table>' \ | |
| tr "|" "\n" \ | |
| sed 's/^[\ \t]*//g' \ | |
| tr -d '\n\r' \ | |
| sed 's/\(<[^>]*>\)\s*\([^>]*\)\s*\(<\/[^>]*>\)/\1\2\3/Ig' \ | |
| sed 's/<\/TR[^>]*>/\n/Ig' \ | |
| sed 's/<\/\?\(TABLE\|THEAD\|TBODY\|TR\)[^>]*>//Ig' \ | |
| sed 's/<T[DH][^>]*>\|<\/\?T[DH][^>]*>$//Ig' \ | |
| sed 's/<\/\?A[^>]*>//Ig' \ | |
| sed 's/<\/T[DH][^>]*>/\t/Ig' \ | |
| grep -vi Alert \ | |
| sed 's/\s\+Travel Warning\s*//Ig' | |
# get data | |
# remove newlines | |
# extract table | |
# put newlines back | |
# remove leading spaces in front of tags | |
# now remove all newlines and carriage returns | |
# trim tag contents | |
# linebreak the rows | |
# remove start tags; except for th/td | |
# now remove th/td start tags and end tags at end of line | |
# remove the URLs | |
# break columns by tab char | |
# remove Alerts (they are temporary) | |
# remove " Travel Warning" from location |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment