Skip to content

Instantly share code, notes, and snippets.

@jfut
Last active August 7, 2023 07:31
Show Gist options
  • Save jfut/eab53d92a91ec8f3867df7f76c4e302a to your computer and use it in GitHub Desktop.
Save jfut/eab53d92a91ec8f3867df7f76c4e302a to your computer and use it in GitHub Desktop.
wget-mirror-adjust
#!/bin/bash
#
# Mirror the page without extensions as it is.
#
# Copyright (c) 2023 Jun Futagawa (jfut)
#
# This software is released under the MIT License.
# http://opensource.org/licenses/mit-license.php
set -uo pipefail
TARGET_URL="${1:-}"
if [[ -z "${TARGET_URL}" ]]; then
echo "Usage $0 TARGET_URL"
exit 1
fi
FQDN="${TARGET_URL}"
# https://example.org/dir/ -> example.org/dir/
FQDN="${FQDN#*//}"
# example.org/dir/ -> example.org
FQDN="${FQDN%*/}"
BASE_DIR="$(pwd)/${FQDN}"
echo "# FQDN: ${FQDN}"
echo "# BASE_DIR: ${BASE_DIR}"
echo "# Mirror without --adjust-extension option"
MIRROR_NO_ADJUST_DIR="./${FQDN}.noadjust"
wget -m -p -np --directory-prefix="${MIRROR_NO_ADJUST_DIR}" -q --show-progress "${TARGET_URL}"
cp -a "./${MIRROR_NO_ADJUST_DIR}/${FQDN}" "${FQDN}"
echo "# Mirror with --adjust-extension option"
MIRROR_ADJUST_DIR="./${FQDN}.adjust"
wget -m -p -E -np --directory-prefix="${MIRROR_ADJUST_DIR}" -q --show-progress "${TARGET_URL}"
echo
echo -n "# Target: "
pushd "${MIRROR_ADJUST_DIR}/${FQDN}"
# Adjust no extension pages
DIR_LIST=$(find . -type d)
for DIR_NAME in ${DIR_LIST}
do
echo "${DIR_NAME}"
PARENT_DIR="${BASE_DIR}/${DIR_NAME}"
if [[ -f "${PARENT_DIR}" ]]; then
rm "${PARENT_DIR}" -f
fi
if [[ ! -d "${PARENT_DIR}" ]]; then
mkdir -p "${PARENT_DIR}"
fi
NO_EXTENSION_FILE="${DIR_NAME}.html"
if [[ -f "${NO_EXTENSION_FILE}" ]]; then
# echo "NO_EXTENSION_FILE: ${NO_EXTENSION_FILE}"
INDEX_FILE=$(realpath "${BASE_DIR}/${DIR_NAME}/index.html")
# ./doc/user.html -> /path/to/example.org/example.org/doc/user/index.html
echo "${NO_EXTENSION_FILE} -> ${INDEX_FILE}"
yes | cp -a "${NO_EXTENSION_FILE}" "${INDEX_FILE}"
fi
done
popd
# Cleanup
rm "${MIRROR_NO_ADJUST_DIR}" -rf
rm "${MIRROR_ADJUST_DIR}" -rf
echo
echo "# Result: ${BASE_DIR}"
@jfut
Copy link
Author

jfut commented Aug 7, 2023

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment