Last active
June 17, 2019 22:45
-
-
Save marcusrussi/dea34a228e7d0aebe99a1ec5e10df421 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
DEFAULT_TR_URL="https://cascadeclimbers.com/forum/topic/100929-tr-baker-river-to-ross-lake-in-technicolor-mystery-ndespair-pioneer-swiss-spectre-challenger-7252017/" | |
# url for a TR -> broken urls of photos in OP | |
function original_photo_urls () { | |
curl -s $1 | | |
# All of the broken photo urls appear to contain 'plab/data/5' | |
# as a substring | |
egrep 'plab/data/5' | | |
# For a line containing double-quoted strings, extract just the | |
# quoted strings | |
egrep -o '\"[^ ]*\"' | | |
# Filter out the quoted strings that don't include 'http'. In the | |
# case of an <img/> element, this gets rid of the quoted string for | |
# the 'alt' attribute | |
egrep 'http' | | |
# Remove the double quotes | |
sed 's/"//g' | |
} | |
# url of broken photo -> working url | |
function fix_photo_url () { | |
# We know the URL part of the DOM attribute is between | |
# the initial 'img=' and the ampersand of the next argument | |
# to the imageproxy | |
echo $1 | egrep -o 'img=.*&' | | |
# Get rid of the 'img=' and the '&' | |
# Get rid of the 'www.' | |
# Replace 'http' with 'https' | |
sed -e 's/img=//g' \ | |
-e 's/&//g' \ | |
-e 's/www.//g' \ | |
-e 's/http/https/g' | |
} | |
export -f fix_photo_url | |
# Download a TR and get its photo urls. Then fix | |
# the urls and output them one per line. Then download | |
# the images from the fixed URLs and save them in the | |
# working directory under their original name. | |
original_photo_urls ${1:-$DEFAULT_TR_URL} | | |
xargs -I{} bash -c 'fix_photo_url "{}"' | | |
parallel -j30 -v curl -s '{}' '>' {/} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment