Last active
August 29, 2015 14:06
-
-
Save kurobeniq/c96adcc4f3eb45b16822 to your computer and use it in GitHub Desktop.
Download the image from twitpic
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Download the image from twitpic. | |
# wget is required! | |
err() { | |
echo "[err][$(date +'%Y-%m-%dT%H:%M:%S%z')]: $@" >&2 | |
} | |
info() { | |
echo "[info][$(date +'%Y-%m-%dT%H:%M:%S%z')]: $@" | |
} | |
usage(){ | |
echo "Usage: $0 [TWITTER_ACCOUNT_NAME]" | |
exit 1 | |
} | |
retry_over() { | |
err "Retry over." | |
exit 1 | |
} | |
if [ -z "$1" ]; then | |
usage | |
fi | |
readonly RETRY_NUM=10 | |
readonly TWITTER_ACCOUNT_NAME="$1" | |
readonly URL="http://twitpic.com/photos/${TWITTER_ACCOUNT_NAME}" | |
info "START" | |
# Get max_page_num with retry. | |
for i in $(seq 1 ${RETRY_NUM}); do | |
# Check example <a href="?page=30">Last</a> | |
max_page_num=$(curl -s "${URL}" | perl -lne 'print $1 if ( m{<a href="\?page=(\d+)">Last</a>} )') | |
# Check example <a href="?page=2">Next</a> | |
[ -z "${max_page_num}" ] && max_page_num=$(curl -s "${URL}" | perl -lne 'print $1 if ( m{<a href="\?page=(\d+)">Next</a>} )') | |
# Check example <a href="?page=1" class="active">1</a> | |
[ -z "${max_page_num}" ] && max_page_num=$(curl -s "${URL}" | perl -lne 'print $1 if ( m{<a href="\?page=(\d+)"} )') | |
if [ -z "${max_page_num}" ]; then | |
err "Can not get max_page_num. Retry ${i} time." | |
sleep 1 | |
continue | |
fi | |
break | |
done | |
[ -z "${max_page_num}" ] && retry_over | |
info "max_page_num = ${max_page_num}" | |
# Loop page_num | |
for page_num in $(seq 1 ${max_page_num}); do | |
page_url="${URL}?page=${page_num}" | |
info "page_num = $page_num" | |
info "page_url = $page_url" | |
# Get jpg_urls with retry. | |
for i in $(seq 1 ${RETRY_NUM}); do | |
jpg_urls=$(curl -s "${page_url}" | perl -lne 'print $1, "large", $2 if ( m{<img src="(https://.+\.cloudfront\.net/photos/)thumb(/.+\.jpg)} )') | |
if [ -z "${jpg_urls}" ]; then | |
err "Can not get jpg_urls. Retry ${i} time." | |
sleep 1 | |
continue | |
fi | |
break | |
done | |
[ -z "${jpg_urls}" ] && retry_over | |
# Download | |
for jpg_url in ${jpg_urls}; do | |
info "Download ${jpg_url}" | |
wget -q "${jpg_url}" -P ./"${TWITTER_ACCOUNT_NAME}" & | |
done | |
done | |
info "DONE" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment