Skip to content

Instantly share code, notes, and snippets.

@kurobeniq
Last active August 29, 2015 14:06
Show Gist options
  • Save kurobeniq/c96adcc4f3eb45b16822 to your computer and use it in GitHub Desktop.
Save kurobeniq/c96adcc4f3eb45b16822 to your computer and use it in GitHub Desktop.
Download the image from twitpic
#!/bin/bash
# Download the image from twitpic.
# wget is required!
err() {
echo "[err][$(date +'%Y-%m-%dT%H:%M:%S%z')]: $@" >&2
}
info() {
echo "[info][$(date +'%Y-%m-%dT%H:%M:%S%z')]: $@"
}
usage(){
echo "Usage: $0 [TWITTER_ACCOUNT_NAME]"
exit 1
}
retry_over() {
err "Retry over."
exit 1
}
if [ -z "$1" ]; then
usage
fi
readonly RETRY_NUM=10
readonly TWITTER_ACCOUNT_NAME="$1"
readonly URL="http://twitpic.com/photos/${TWITTER_ACCOUNT_NAME}"
info "START"
# Get max_page_num with retry.
for i in $(seq 1 ${RETRY_NUM}); do
# Check example <a href="?page=30">Last</a>
max_page_num=$(curl -s "${URL}" | perl -lne 'print $1 if ( m{<a href="\?page=(\d+)">Last</a>} )')
# Check example <a href="?page=2">Next</a>
[ -z "${max_page_num}" ] && max_page_num=$(curl -s "${URL}" | perl -lne 'print $1 if ( m{<a href="\?page=(\d+)">Next</a>} )')
# Check example <a href="?page=1" class="active">1</a>
[ -z "${max_page_num}" ] && max_page_num=$(curl -s "${URL}" | perl -lne 'print $1 if ( m{<a href="\?page=(\d+)"} )')
if [ -z "${max_page_num}" ]; then
err "Can not get max_page_num. Retry ${i} time."
sleep 1
continue
fi
break
done
[ -z "${max_page_num}" ] && retry_over
info "max_page_num = ${max_page_num}"
# Loop page_num
for page_num in $(seq 1 ${max_page_num}); do
page_url="${URL}?page=${page_num}"
info "page_num = $page_num"
info "page_url = $page_url"
# Get jpg_urls with retry.
for i in $(seq 1 ${RETRY_NUM}); do
jpg_urls=$(curl -s "${page_url}" | perl -lne 'print $1, "large", $2 if ( m{<img src="(https://.+\.cloudfront\.net/photos/)thumb(/.+\.jpg)} )')
if [ -z "${jpg_urls}" ]; then
err "Can not get jpg_urls. Retry ${i} time."
sleep 1
continue
fi
break
done
[ -z "${jpg_urls}" ] && retry_over
# Download
for jpg_url in ${jpg_urls}; do
info "Download ${jpg_url}"
wget -q "${jpg_url}" -P ./"${TWITTER_ACCOUNT_NAME}" &
done
done
info "DONE"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment