Last active
September 4, 2023 00:37
-
-
Save kou1okada/5230595 to your computer and use it in GitHub Desktop.
backup script for livedoor wiki.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Copyright (c) 2013 Koichi OKADA. All rights reserved. | |
# This script is distributed under the MIT license. | |
# http://www.opensource.org/licenses/mit-license.php | |
SCRIPTNAME=${0##*/} | |
function usage () | |
{ | |
cat <<EOD | |
Usage: ${SCRIPTNAME} [options] url | |
backup script for livedoor wiki. | |
options: | |
-h, --help | |
-n, --noupdate | |
-l, --list | |
-b, --backup | |
-d, --directory keep "/" in pagename | |
note: | |
url is livedoor wiki base url like http://wiki.livedoor.jp/wikiname | |
EOD | |
} | |
function percent_encoding () | |
{ | |
LANG=C awk 'BEGIN{s=ARGV[1];for(i=0;i<256;i++)ord[sprintf("%c",i)]=i;for(i=1;i<=length(s);i++){c=substr(s,i,1);if(match(c,/[-._~0-9a-zA-Z]/))printf("%s",c);else printf("%%%02X",ord[c])}exit}' "$1" | |
} | |
function update_pagelist () | |
{ | |
wget -mkl0 -np "$LISTURL" | |
LANG=ja_JP.EUC-JP awk ' | |
{ | |
while(0 < length($0)) { | |
if (match($0, /href *= *"([^"]*)"/, m)) { | |
print m[1]; | |
$0 = substr($0, RSTART + RLENGTH); | |
} else { | |
$0 = ""; | |
} | |
} | |
}' "${LISTDIR}"* | sort | uniq | grep "${PAGEURL}" > "$UPDATELIST" | |
} | |
function pagename2filename () | |
{ | |
echo "$1" | sed -e 's/\//%2F/g' | |
} | |
OPT_u=true | |
OPT_l=false | |
OPT_b=false | |
OPT_d=false | |
while [ $# -gt 0 ]; do | |
case "$1" in | |
--help|-h) | |
usage | |
exit | |
;; | |
--noupdate|-n) | |
OPT_u=false | |
shift | |
;; | |
--list|-l) | |
OPT_l=true | |
shift | |
;; | |
--backup|-b) | |
OPT_b=true | |
shift | |
;; | |
--directory|-d) | |
OPT_d=true | |
shift | |
;; | |
*) | |
ARGV+=("$1") | |
shift | |
;; | |
esac | |
done | |
if [ ! ${#ARGV[@]} -eq 1 ]; then | |
usage | |
exit | |
fi | |
BASEURL="${ARGV[0]}/////" | |
BASEURL="${BASEURL%%/////*}"/ | |
LISTURL="${BASEURL}l/" | |
PAGEURL="${BASEURL}d/" | |
LISTDIR="${LISTURL#http*://}" | |
PAGEDIR="${PAGEURL#http*://}" | |
KEY="$(percent_encoding "${BASEURL}")" | |
UPDATELIST="pagelist_${KEY}.txt" | |
WAIT0=8 | |
MAXRETRY=10 | |
"$OPT_u" && update_pagelist | |
"$OPT_l" && cut -b$[ ${#PAGEURL} + 1 ]- "${UPDATELIST}" | |
if "$OPT_b"; then | |
echo "$0" | |
echo "${ARGV[0]}" | |
mkdir -p "${PAGEDIR}" | |
"$0" -l -n "${ARGV[0]}" | while read i; do | |
if "$OPT_d"; then | |
PAGEFILE="${PAGEDIR}$i" | |
else | |
PAGEFILE="${PAGEDIR}$(pagename2filename "$i")" | |
fi | |
if [ -s "$PAGEFILE" ]; then | |
echo "$i" is already downloaded. nothing to do. | |
else | |
WAIT=$WAIT0 | |
RETRY_COUNT=0 | |
while true; do | |
if "$OPT_d"; then | |
wget -N -k -x --restrict-file-name=ascii ${PAGEURL}"$i" | |
else | |
wget -k -x -O "$PAGEFILE" ${PAGEURL}"$i" | |
fi | |
(( $? == 0 )) && break | |
(( MAXRETRY < RETRY_COUNT )) && { | |
echo "Abort: RETRY_COUNT exceeded MAXRETRY." | |
exit 1 | |
} | |
echo "Warning: wget done with error. retry after $WAIT sec." | |
sleep $WAIT | |
let WAIT=WAIT*2 | |
let RETRY_COUNT++ | |
done | |
fi | |
done | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment