Skip to content

Instantly share code, notes, and snippets.

@daTokenizer
Created February 29, 2020 13:57
Show Gist options
  • Save daTokenizer/74306cc052ee9d2a2658aaf3819ec581 to your computer and use it in GitHub Desktop.
Save daTokenizer/74306cc052ee9d2a2658aaf3819ec581 to your computer and use it in GitHub Desktop.
an up-to-date criu-docer helper script
#!/bin/bash
#
# A convenience shell script to call criu for checkpointing and restoring
# a Docker container.
#
# This script saves the user from having to remember all the command
# line options, some of which are very long. Note that once Docker
# has native support for checkpoint and restore, there will no longer
# be a need for this particular shell script.
#
set -o errexit
set -o nounset
set -o pipefail
#
# These can be set in the environment to override their defaults.
# Note that while the default value of CRIU_IMG_DIR in this script
# is a directory in DOCKER_HOME, it doesn't have to be tied to
# DOCKER_HOME. For example, it can be /var/spool/criu_img.
#
: ${DOCKER_HOME=/var/lib/docker}
: ${DOCKER_BINARY=docker}
: ${CRIU_IMG_DIR=${DOCKER_HOME}/criu_img}
: ${CRIU_BINARY=criu}
: ${DOCKERINIT_BINARY=/usr/bin/docker-init}
#
# Patterns for different filesystem types in dump.log.
#
readonly AUFS_PATTERN='/sys/fs/aufs/si_'
readonly OVERLAYFS_PATTERN='type.*source.*options.*lowerdir=.*upperdir=.*workdir='
readonly UNIONFS_PATTERN='type.*source.*options.*dirs='
#
# These globals will be set by init_container_vars()
#
declare CID
declare CONTAINER_IMG_DIR
declare CONTAINER_DUMP_LOG
declare -A BIND_MOUNT
BIND_MOUNT[/etc/resolv.conf]=.ResolvConfPath
BIND_MOUNT[/etc/hosts]=.HostsPath
BIND_MOUNT[/etc/hostname]=.HostnamePath
MOUNT_MAP_ARGS=()
#
# The default mode is non-verbose, printing only a short message
# saying if the command succeeded or failed. For the verbose mode,
# we could have used set -o xtrace but this option would have
# generated excessive output suitable for debugging, not normal
# usage. So we set ${ECHO} to echo in the verbose mode to print
# selected messages.
#
VERBOSE=""
ECHO=":"
CMD=""
PGNAME=$(basename "$0")
usage() {
local rv=0
if [[ -n "${1-}" ]]; then
rv=1
echo -e "${PGNAME}: $1\n" >&2
fi
cat <<EOF
Usage:
${PGNAME} -c|-r [-hv] [<container_id>]
-c, --checkpoint checkpoint container
-h, --help print help message
-r, --restore restore container
-v, --verbose enable verbose mode
Environment:
DOCKER_HOME (default ${DOCKER_HOME})
CRIU_IMG_DIR (default ${CRIU_IMG_DIR})
DOCKER_BINARY (default ${DOCKER_BINARY})
DOCKERINIT_BINARY (default \${DOCKER_HOME}/init/dockerinit-<version>-dev)
CRIU_BINARY (default ${CRIU_BINARY})
EOF
exit ${rv}
}
#
# If the user has not specified a bind mount file for the container's
# /.dockerinit, try to determine it from the Docker version.
#
find_dockerinit() {
local v
if [[ -z "${DOCKERINIT_BINARY}" ]]; then
v=$("${DOCKER_BINARY}" --version | sed -e 's/.*version \(.*\),.*/\1/')
DOCKERINIT_BINARY="${DOCKER_HOME}/init/dockerinit-${v}"
elif [[ "${DOCKERINIT_BINARY}" != /* ]]; then
DOCKERINIT_BINARY="${DOCKER_HOME}/init/${DOCKERINIT_BINARY}"
fi
if [[ ! -x "${DOCKERINIT_BINARY}" ]]; then
echo "${DOCKERINIT_BINARY} does not exist"
exit 1
fi
BIND_MOUNT[/.dockerinit]="${DOCKERINIT_BINARY}"
}
parse_args() {
local args
local flags
args=$(getopt --options 'chrv' \
--longoptions 'checkpoint help restore verbose' -- "$@")
[[ $? == 0 ]] || usage
eval set -- "${args}"
while :; do
arg="${1}"
shift
case "${arg}" in
-c|--checkpoint) CMD="dump" ;;
-h|--help) usage ;;
-r|--restore) CMD="restore" ;;
-v|--verbose) VERBOSE="-v"; ECHO="echo" ;;
--) break ;;
*) usage "internal error parsing arguments!" ;;
esac
done
[[ "${CMD}" == "" ]] && usage "need either -c or -r"
[[ $# -gt 1 ]] && usage "$# too many arguments"
# if no container id in args, prompt the user
if [[ $# -eq 1 ]]; then
CID="$1"
else
if [[ "${CMD}" == "dump" ]]; then
flags=""
else
# we need -a only for restore
flags="-a"
fi
"${DOCKER_BINARY}" ps ${flags}
read -rp $'\nContainer ID: ' CID
fi
}
execute() {
# since commands are pretty long and can wrap around
# several lines, print a blank line to make it visually
# easier to see
${ECHO} -e "\n$*"
"$@"
}
init_container_vars() {
local d
CID=$(get_container_conf .Id)
d=$("${DOCKER_BINARY}" info 2> /dev/null | awk '/Storage Driver:/ { print $3 }')
if [[ "${d}" == "vfs" ]]; then
CONTAINER_ROOT_DIR="${DOCKER_HOME}/${d}/dir/${CID}"
elif [[ "${d}" == "aufs" || "${d}" == "unionfs" ]]; then
CONTAINER_ROOT_DIR="${DOCKER_HOME}/${d}/mnt/${CID}"
elif [[ "${d}" == "overlay" ]]; then
CONTAINER_ROOT_DIR="${DOCKER_HOME}/${d}/${CID}/merged"
elif [[ "${d}" == "overlay2" ]]; then
CONTAINER_ROOT_DIR="${DOCKER_HOME}/${d}/${CID}"
else
echo "${d}: unknown filesystem type"
return 1
fi
CONTAINER_IMG_DIR="${CRIU_IMG_DIR}/${CID}"
CONTAINER_DUMP_LOG="${CONTAINER_IMG_DIR}/dump.log"
}
get_container_conf() {
local val
val=$("${DOCKER_BINARY}" inspect --format "{{$1}}" "${CID}")
[[ "${val}" == "" ]] && exit 1
echo "${val/<no value>/}"
}
setup_mount_map() {
local key
if [[ "$1" == "dump" ]]; then
for key in "${!BIND_MOUNT[@]}"; do
MOUNT_MAP_ARGS+=(--ext-mount-map "${key}:${key}")
done
else
for key in "${!BIND_MOUNT[@]}"; do
if [[ "${key}" == "/.dockerinit" ]]; then
MOUNT_MAP_ARGS+=("--ext-mount-map" "${key}:${BIND_MOUNT[$key]}")
else
MOUNT_MAP_ARGS+=("--ext-mount-map" "${key}:$(get_container_conf "${BIND_MOUNT[$key]}")")
fi
done
fi
}
fs_mounted() {
if grep -wq "$1" /proc/self/mountinfo; then
${ECHO} "container root directory already mounted"
return 0
fi
${ECHO} "container root directory not mounted"
return 1
}
#
# Pretty print the mount command in verbose mode by putting each branch
# pathname on a single line for easier visual inspection.
#
pp_mount() {
${ECHO} -e "\nmount -t $1 -o"
${ECHO} "${2}" | tr ':,' '\n'
${ECHO} "${3}"
${ECHO} "${4}"
}
#
# Reconstruct the AUFS filesystem from information in CRIU's dump log.
# The dump log has a series of branch entries for each process in the
# entire process tree in the following form:
#
# (00.014075) /sys/fs/aufs/si_f598876b0855b883/br0 : /var/lib/docker/aufs/diff/<ID>
#
# Note that this script assumes that all processes in the process
# tree have the same AUFS filesystem. This assumption is fairly
# safe for typical Docker containers.
#
setup_aufs() {
local -r tmpf="${CONTAINER_IMG_DIR}/aufs.br"
local br
local branches
# nothing to do if filesystem already mounted
fs_mounted "${CONTAINER_ROOT_DIR}" && return
# create a temporary file with branches listed in
# ascending order (line 1 is branch 0)
awk '/aufs.si_/ { print $2, $4 }' "${CONTAINER_DUMP_LOG}" | \
sort | uniq | awk '{ print $2 }' > "${tmpf}"
# construct the mount option string from branches
branches=""
while read br; do
branches+="${branches:+:}${br}"
done < "${tmpf}"
# mount the container's filesystem
pp_mount "aufs" "${branches}" "none" "${CONTAINER_ROOT_DIR}"
mount -t aufs -o br="${branches}" none "${CONTAINER_ROOT_DIR}"
rm -f "${tmpf}"
}
setup_overlayfs() {
local lowerdir
local upperdir
local workdir
local ovlydirs
local -r f="${CONTAINER_DUMP_LOG}"
# nothing to do if filesystem already mounted
fs_mounted "${CONTAINER_ROOT_DIR}" && return
lowerdir=$(grep "${OVERLAYFS_PATTERN}" "${f}" | sed -n -e 's/.*lowerdir=\([^,]*\).*/\1/p')
upperdir=$(grep "${OVERLAYFS_PATTERN}" "${f}" | sed -n -e 's/.*upperdir=\([^,]*\).*/\1/p')
workdir=$(grep "${OVERLAYFS_PATTERN}" "${f}" | sed -n -e 's/.*workdir=\([^,]*\).*/\1/p')
ovlydirs="lowerdir=${lowerdir},upperdir=${upperdir},workdir=${workdir}"
# mount the container's filesystem
pp_mount "overlay" "${ovlydirs}" "overlay" "${CONTAINER_ROOT_DIR}"
mount -t overlay -o "${ovlydirs}" overlay "${CONTAINER_ROOT_DIR}"
}
#
# Reconstruct the UnionFS filesystem from information in CRIU's dump log.
# The dump log has the mountinfo root entry for the filesystem. The
# options field contains the list of directories that make up the UnionFS.
#
# Note that this script assumes that all processes in the process
# tree have the same UnionFS filesystem. This assumption is fairly
# safe for typical Docker containers.
#
# XXX If /dev/null was manually created by Docker (i.e., it's not in
# a branch), create it. Although this has worked so far, it needs
# a deeper look as I am not sure if /dev/null should be created as
# a regular file to be the target of a bind mount or created as a
# device file by mknod.
#
setup_unionfs() {
local dirs
# nothing to do if filesystem already mounted
fs_mounted "${CONTAINER_ROOT_DIR}" && return
dirs=$(sed -n -e 's/.*type.*dirs=/dirs=/p' "${CONTAINER_DUMP_LOG}")
[[ "${dirs}" = "" ]] && echo "do not have branch information" && exit 1
# mount the container's filesystem
pp_mount "unionfs" "${dirs}" "none" "${CONTAINER_ROOT_DIR}"
mount -t unionfs -o "${dirs}" none "${CONTAINER_ROOT_DIR}"
# see comment at the beginning of the function
if [[ ! -e "${CONTAINER_ROOT_DIR}/dev/null" ]]; then
execute touch "${CONTAINER_ROOT_DIR}/dev/null"
fi
}
prep_dump() {
local pid
pid=$(get_container_conf .State.Pid)
# docker returns 0 for containers it thinks have exited
# (i.e., dumping a restored container again)
if [[ ${pid} -eq 0 ]]; then
echo -e "\nCheckpointing a restored container?"
read -p "Process ID: " pid
fi
# remove files previously created by criu but not others files (if any)
mkdir -p "${CONTAINER_IMG_DIR}"
rm -f "${CONTAINER_IMG_DIR}"/*.{img,log,pid} "${CONTAINER_IMG_DIR}"/stats-restore
CMD_ARGS=("-t" "${pid}")
# we need --root only for aufs to compensate for the
# erroneous information in /proc/<pid>/map_files
if [[ "${CONTAINER_ROOT_DIR}" == *aufs* ]]; then
CMD_ARGS+=("--root" "${CONTAINER_ROOT_DIR}")
fi
}
#
# Set up container's root filesystem if not already set up.
#
prep_restore() {
local -r f="${CONTAINER_DUMP_LOG}"
if [[ ! -f "${f}" ]]; then
echo "${f} does not exist"
return 1
fi
if grep -q "${AUFS_PATTERN}" "${f}"; then
setup_aufs
elif grep -q "${OVERLAYFS_PATTERN}" "${f}"; then
setup_overlayfs
elif grep -q "${UNIONFS_PATTERN}" "${f}"; then
setup_unionfs
fi
# criu requires this (due to container using pivot_root)
if ! grep -qw "${CONTAINER_ROOT_DIR}" /proc/self/mountinfo; then
execute mount --rbind "${CONTAINER_ROOT_DIR}" "${CONTAINER_ROOT_DIR}"
MOUNTED=1
else
MOUNTED=0
fi
CMD_ARGS=("-d" "--root" "${CONTAINER_ROOT_DIR}" "--pidfile" "${CONTAINER_IMG_DIR}/restore.pid")
}
#
# Since this function produces output string (either in the
# verbose mode or from ${CRIU_BINARY}), we set the return value
# in parameter 1.
#
run_criu() {
local -a common_args=("-v4" "-D" "${CONTAINER_IMG_DIR}" \
"-o" "${CMD}.log" \
"--manage-cgroups" \
"--evasive-devices")
setup_mount_map "${CMD}"
common_args+=("${MOUNT_MAP_ARGS[@]}")
# we do not want to exit if there's an error
execute "${CRIU_BINARY}" "${CMD}" "${common_args[@]}" "${CMD_ARGS[@]}"
}
wrap_up() {
local -r logf="${CONTAINER_IMG_DIR}/${CMD}.log"
local -r pidf="${CONTAINER_IMG_DIR}/restore.pid"
if [[ $1 -eq 0 ]]; then
${ECHO} -e "\n"
echo "${CMD} successful"
else
${ECHO} -e "\n"
echo "${CMD} failed"
fi
if [[ "${VERBOSE}" == "-v" && -e "${logf}" ]]; then
if ! grep "finished successfully" "${logf}"; then
grep Error "${logf}"
fi
fi
if [[ "${CMD}" == "restore" ]]; then
if [[ ${MOUNTED} -eq 1 ]]; then
execute umount "${CONTAINER_ROOT_DIR}"
fi
if [[ -e "${pidf}" ]]; then
${ECHO} -e "\n$(ps -f -p "$(cat "${pidf}")" --no-headers)"
fi
fi
}
resolve_path() {
local p
p="${2}"
if which realpath > /dev/null; then
p=$(realpath "${p}")
fi
${ECHO} "${1}: ${p}"
}
resolve_cmd() {
local cpath
cpath=$(which "${2}")
resolve_path "${1}" "${cpath}"
}
main() {
local rv=0
if [[ $(id -u) -ne 0 ]]; then
echo "not running as root"
exit 1
fi
parse_args "$@"
find_dockerinit
init_container_vars
if [[ "${VERBOSE}" == "-v" ]]; then
echo
resolve_cmd "docker binary" "${DOCKER_BINARY}"
resolve_cmd "dockerinit binary" "${DOCKERINIT_BINARY}"
resolve_cmd "criu binary" "${CRIU_BINARY}"
resolve_path "image directory" "${CONTAINER_IMG_DIR}"
resolve_path "container root directory" "${CONTAINER_ROOT_DIR}"
fi
if [[ "${CMD}" == "dump" ]]; then
prep_dump
else
prep_restore
fi
run_criu || rv=$?
wrap_up ${rv}
exit ${rv}
}
main "$@"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment