Last active
April 15, 2019 11:09
-
-
Save joseluis/18975af8645e0822e313d8d2c451cd6a to your computer and use it in GitHub Desktop.
download all the commitstrip strips
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# | |
# name : commitstrip-dl | |
# description : download all the commitstrip strips | |
# version : 1.0.0 | |
# author : José Luis Cruz © 2017 | |
# dependencies: bash<=4 curl [sxiv] | |
# license : MIT | |
# | |
#------------------------------------------------------------------------------- | |
# ::main | |
# | |
commitstrip-dl::main() { | |
local image_dir="./imgs" # directory where to save the images | |
local image_viewer="sxiv" # (default) image viewer binary | |
local web_src # the source coude of the current webpage | |
local image_url # the url of the current image (strip) | |
local image_filename # the target filename for the image | |
# with the format: $post_date-$post_title | |
local -l post_title # the title of the post | |
local next_post_link | |
local next_post_selector | |
local operation="$1" # [all|update] | |
case "$operation" in | |
all) # start from the first strip, forwards | |
next_post_link="http://www.commitstrip.com/en/2012/02/22/interview/" | |
next_post_selector='rel="next' | |
;; | |
update) # start from the last strip, backwards | |
next_post_link=$(commitstrip-dl::get-last-post) | |
next_post_selector='rel="prev' | |
;; | |
random) # display a random image with sxiv by default | |
if [[ -n $2 ]]; then | |
if [[ $(which "$2" 2> /dev/null) ]]; then | |
image_viewer="$2" | |
else | |
echo "ERROR: you need to provide a valid program." | |
exit 1 | |
fi | |
fi | |
if [[ $(which "$image_viewer" 2> /dev/null) ]]; then | |
$image_viewer "$(find "$image_dir" | sort -R | head -1)" & | |
exit 0 | |
else | |
echo "ERROR: $image_viewer doesn't exist." \ | |
" Please install it or use another program." | |
exit 1 | |
fi | |
;; | |
*) | |
commitstrip-dl::usage | |
exit 1 | |
;; | |
esac | |
# prepare where to save the images | |
mkdir -p -- "$image_dir" | |
cd -- "$image_dir" || exit | |
while true; do | |
web_src=$(curl -s "$next_post_link") | |
# get commit image | |
image_url=$(grep '<img .*src=.*uploads' <(printf '%s' "$web_src") \ | |
| head -1 \ | |
| sed -n 's/.*<img .*src="\([^"]*\)".*/\1/p') | |
# generate the img filename | |
# | |
# 1. extract the date of the post | |
post_date=$(egrep -o '[0-9]{4}/[0-9]{2}/[0-9]{2}' \ | |
<(printf '%s' "$next_post_link") \ | |
| tr -d '/' ) | |
# 2. filter the post title | |
# - extract the text | |
# - convert html entities to spaces | |
# - convert special characters to spaces | |
# - convert multiple spaces into one | |
# - remove leading/trailing spaces | |
# - replace spaces with dashes | |
post_title=$(grep '<h1 .*entry-title' <(printf '%s' "$web_src") \ | |
| sed -n 's/.*<h1 .*>\([^<]*\)<.*/\1/p' \ | |
| sed -e 's/\&[^\s]*;/ /g' \ | |
| sed -e 's/[^A-Za-z0-9._-]/ /g' \ | |
| sed -e 's/[[:space:]]\+/ /g' \ | |
| sed -e 's/^[[:space:]]*//g;s/[[:space:]]*$//g' \ | |
| sed -e 's/[[:space:]]/-/g' \ | |
) | |
# 3. generate the local image file name, with the appropriate extension | |
image_filename="$post_date-$post_title.${image_url##*.}" | |
printf '"%s" ... ' "$image_filename" | |
# detect an already existing image | |
if [[ -f $image_filename ]]; then | |
printf 'alredy exists!\n' | |
if [[ $operation == update ]]; then | |
printf '\nDone!\n' | |
exit 0 | |
fi | |
else | |
# download the image | |
curl -so "$image_filename" "$image_url" | |
curl_err="$?" | |
if [[ $curl_err -eq 0 ]]; then | |
printf 'Ok.\n' | |
else | |
printf 'ERROR: %s\n' "$curl_err" | |
fi | |
fi | |
# get next post link | |
next_post_link=$(grep "$next_post_selector" <(printf '%s' "$web_src") \ | |
| head -1 \ | |
| sed -n 's/.*<a href="\([^"]*\)".*/\1/p') | |
# Detect the end (no more posts) | |
if [[ -z $next_post_link ]]; then | |
printf '\nDone!\n' | |
exit 0 | |
fi | |
# artificial delay | |
sleep 0.5 | |
done | |
} # ::main | |
#------------------------------------------------------------------------------- | |
# ::get-last-post | |
# | |
# | |
# < the last post URL | |
# | |
commitstrip-dl::get-last-post() { | |
local web="http://www.commitstrip.com/en/" # the list of posts | |
local web_src; web_src=$(curl -s "$web") | |
local last_post | |
last_post=$(grep -A 20 '<div class="excerpts">' <(printf '%s' "$web_src") \ | |
| grep -e '[^-]<a href=' \ | |
| sed -n 's/.*<a href="\([^"]*\)".*/\1/p') | |
printf '%s' "$last_post" | |
} # ::get-last-post | |
#------------------------------------------------------------------------------- | |
# ::usage | |
# | |
commitstrip-dl::usage() { | |
cat -- <<-ENDUSAGE | |
Usage: $(basename "$0") [all|update] | |
all will fetch all commit strips since the first one. | |
update will start fetching the last commit strip, until the first one | |
it finds that's already downloaded | |
See the comic strips online at <http://www.commitstrip.com/en/> | |
ENDUSAGE | |
} # ::usage | |
if [[ $0 == "${BASH_SOURCE[0]}" ]]; then commitstrip-dl::main "$@"; fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment