Skip to content

Instantly share code, notes, and snippets.

@joseluis
Last active April 15, 2019 11:09
Show Gist options
  • Save joseluis/18975af8645e0822e313d8d2c451cd6a to your computer and use it in GitHub Desktop.
Save joseluis/18975af8645e0822e313d8d2c451cd6a to your computer and use it in GitHub Desktop.
download all the commitstrip strips
#!/usr/bin/env bash
#
# name : commitstrip-dl
# description : download all the commitstrip strips
# version : 1.0.0
# author : José Luis Cruz © 2017
# dependencies: bash<=4 curl [sxiv]
# license : MIT
#
#-------------------------------------------------------------------------------
# ::main
#
commitstrip-dl::main() {
local image_dir="./imgs" # directory where to save the images
local image_viewer="sxiv" # (default) image viewer binary
local web_src # the source coude of the current webpage
local image_url # the url of the current image (strip)
local image_filename # the target filename for the image
# with the format: $post_date-$post_title
local -l post_title # the title of the post
local next_post_link
local next_post_selector
local operation="$1" # [all|update]
case "$operation" in
all) # start from the first strip, forwards
next_post_link="http://www.commitstrip.com/en/2012/02/22/interview/"
next_post_selector='rel="next'
;;
update) # start from the last strip, backwards
next_post_link=$(commitstrip-dl::get-last-post)
next_post_selector='rel="prev'
;;
random) # display a random image with sxiv by default
if [[ -n $2 ]]; then
if [[ $(which "$2" 2> /dev/null) ]]; then
image_viewer="$2"
else
echo "ERROR: you need to provide a valid program."
exit 1
fi
fi
if [[ $(which "$image_viewer" 2> /dev/null) ]]; then
$image_viewer "$(find "$image_dir" | sort -R | head -1)" &
exit 0
else
echo "ERROR: $image_viewer doesn't exist." \
" Please install it or use another program."
exit 1
fi
;;
*)
commitstrip-dl::usage
exit 1
;;
esac
# prepare where to save the images
mkdir -p -- "$image_dir"
cd -- "$image_dir" || exit
while true; do
web_src=$(curl -s "$next_post_link")
# get commit image
image_url=$(grep '<img .*src=.*uploads' <(printf '%s' "$web_src") \
| head -1 \
| sed -n 's/.*<img .*src="\([^"]*\)".*/\1/p')
# generate the img filename
#
# 1. extract the date of the post
post_date=$(egrep -o '[0-9]{4}/[0-9]{2}/[0-9]{2}' \
<(printf '%s' "$next_post_link") \
| tr -d '/' )
# 2. filter the post title
# - extract the text
# - convert html entities to spaces
# - convert special characters to spaces
# - convert multiple spaces into one
# - remove leading/trailing spaces
# - replace spaces with dashes
post_title=$(grep '<h1 .*entry-title' <(printf '%s' "$web_src") \
| sed -n 's/.*<h1 .*>\([^<]*\)<.*/\1/p' \
| sed -e 's/\&[^\s]*;/ /g' \
| sed -e 's/[^A-Za-z0-9._-]/ /g' \
| sed -e 's/[[:space:]]\+/ /g' \
| sed -e 's/^[[:space:]]*//g;s/[[:space:]]*$//g' \
| sed -e 's/[[:space:]]/-/g' \
)
# 3. generate the local image file name, with the appropriate extension
image_filename="$post_date-$post_title.${image_url##*.}"
printf '"%s" ... ' "$image_filename"
# detect an already existing image
if [[ -f $image_filename ]]; then
printf 'alredy exists!\n'
if [[ $operation == update ]]; then
printf '\nDone!\n'
exit 0
fi
else
# download the image
curl -so "$image_filename" "$image_url"
curl_err="$?"
if [[ $curl_err -eq 0 ]]; then
printf 'Ok.\n'
else
printf 'ERROR: %s\n' "$curl_err"
fi
fi
# get next post link
next_post_link=$(grep "$next_post_selector" <(printf '%s' "$web_src") \
| head -1 \
| sed -n 's/.*<a href="\([^"]*\)".*/\1/p')
# Detect the end (no more posts)
if [[ -z $next_post_link ]]; then
printf '\nDone!\n'
exit 0
fi
# artificial delay
sleep 0.5
done
} # ::main
#-------------------------------------------------------------------------------
# ::get-last-post
#
#
# < the last post URL
#
commitstrip-dl::get-last-post() {
local web="http://www.commitstrip.com/en/" # the list of posts
local web_src; web_src=$(curl -s "$web")
local last_post
last_post=$(grep -A 20 '<div class="excerpts">' <(printf '%s' "$web_src") \
| grep -e '[^-]<a href=' \
| sed -n 's/.*<a href="\([^"]*\)".*/\1/p')
printf '%s' "$last_post"
} # ::get-last-post
#-------------------------------------------------------------------------------
# ::usage
#
commitstrip-dl::usage() {
cat -- <<-ENDUSAGE
Usage: $(basename "$0") [all|update]
all will fetch all commit strips since the first one.
update will start fetching the last commit strip, until the first one
it finds that's already downloaded
See the comic strips online at <http://www.commitstrip.com/en/>
ENDUSAGE
} # ::usage
if [[ $0 == "${BASH_SOURCE[0]}" ]]; then commitstrip-dl::main "$@"; fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment