Skip to content

Instantly share code, notes, and snippets.

@kou1okada
Last active September 4, 2023 00:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kou1okada/5230595 to your computer and use it in GitHub Desktop.
Save kou1okada/5230595 to your computer and use it in GitHub Desktop.
backup script for livedoor wiki.
#!/usr/bin/env bash
# Copyright (c) 2013 Koichi OKADA. All rights reserved.
# This script is distributed under the MIT license.
# http://www.opensource.org/licenses/mit-license.php
SCRIPTNAME=${0##*/}
function usage ()
{
cat <<EOD
Usage: ${SCRIPTNAME} [options] url
backup script for livedoor wiki.
options:
-h, --help
-n, --noupdate
-l, --list
-b, --backup
-d, --directory keep "/" in pagename
note:
url is livedoor wiki base url like http://wiki.livedoor.jp/wikiname
EOD
}
function percent_encoding ()
{
LANG=C awk 'BEGIN{s=ARGV[1];for(i=0;i<256;i++)ord[sprintf("%c",i)]=i;for(i=1;i<=length(s);i++){c=substr(s,i,1);if(match(c,/[-._~0-9a-zA-Z]/))printf("%s",c);else printf("%%%02X",ord[c])}exit}' "$1"
}
function update_pagelist ()
{
wget -mkl0 -np "$LISTURL"
LANG=ja_JP.EUC-JP awk '
{
while(0 < length($0)) {
if (match($0, /href *= *"([^"]*)"/, m)) {
print m[1];
$0 = substr($0, RSTART + RLENGTH);
} else {
$0 = "";
}
}
}' "${LISTDIR}"* | sort | uniq | grep "${PAGEURL}" > "$UPDATELIST"
}
function pagename2filename ()
{
echo "$1" | sed -e 's/\//%2F/g'
}
OPT_u=true
OPT_l=false
OPT_b=false
OPT_d=false
while [ $# -gt 0 ]; do
case "$1" in
--help|-h)
usage
exit
;;
--noupdate|-n)
OPT_u=false
shift
;;
--list|-l)
OPT_l=true
shift
;;
--backup|-b)
OPT_b=true
shift
;;
--directory|-d)
OPT_d=true
shift
;;
*)
ARGV+=("$1")
shift
;;
esac
done
if [ ! ${#ARGV[@]} -eq 1 ]; then
usage
exit
fi
BASEURL="${ARGV[0]}/////"
BASEURL="${BASEURL%%/////*}"/
LISTURL="${BASEURL}l/"
PAGEURL="${BASEURL}d/"
LISTDIR="${LISTURL#http*://}"
PAGEDIR="${PAGEURL#http*://}"
KEY="$(percent_encoding "${BASEURL}")"
UPDATELIST="pagelist_${KEY}.txt"
WAIT0=8
MAXRETRY=10
"$OPT_u" && update_pagelist
"$OPT_l" && cut -b$[ ${#PAGEURL} + 1 ]- "${UPDATELIST}"
if "$OPT_b"; then
echo "$0"
echo "${ARGV[0]}"
mkdir -p "${PAGEDIR}"
"$0" -l -n "${ARGV[0]}" | while read i; do
if "$OPT_d"; then
PAGEFILE="${PAGEDIR}$i"
else
PAGEFILE="${PAGEDIR}$(pagename2filename "$i")"
fi
if [ -s "$PAGEFILE" ]; then
echo "$i" is already downloaded. nothing to do.
else
WAIT=$WAIT0
RETRY_COUNT=0
while true; do
if "$OPT_d"; then
wget -N -k -x --restrict-file-name=ascii ${PAGEURL}"$i"
else
wget -k -x -O "$PAGEFILE" ${PAGEURL}"$i"
fi
(( $? == 0 )) && break
(( MAXRETRY < RETRY_COUNT )) && {
echo "Abort: RETRY_COUNT exceeded MAXRETRY."
exit 1
}
echo "Warning: wget done with error. retry after $WAIT sec."
sleep $WAIT
let WAIT=WAIT*2
let RETRY_COUNT++
done
fi
done
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment