Last active
April 3, 2024 06:24
-
-
Save karlrwjohnson/1921b05c290edb665c238676ef847f3c to your computer and use it in GitHub Desktop.
Lockfile Demo - Demonstrates how to use Linux's "flock" command to create mutexes inside of a shell script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Lockfiles are a feature in Linux which allows multiple processes to use a file as a mutex. | |
# This script contains example code based on this blog post, but with a bugfix: https://dmorgan.info/posts/linux-lock-files/ | |
# Wrapper function for easy reuse. | |
# I've heavily annotated it, but without the comments or arguments it's only 13 lines long. | |
# Parameters (you can hard-code them instead if you want) | |
# - LOCK_FILE: Which file to use for the lock. | |
# Needs to be a writable location and unique to the "thing" that needs to be locked on. | |
# The file doesn't need to exist yet. | |
# - LOCK_TIMEOUT: (Optional) How many SECONDS to wait before aborting | |
# ...remaining args: Command to run | |
function lock_cmd { | |
LOCK_FILE="$1"; shift | |
LOCK_TIMEOUT="$1"; shift; | |
( | |
# Remove the lockfile automatically when the function finishes | |
trap "rm -f $LOCK_FILE" 0 | |
# "flock", or "file lock", takes a file descriptor ID as a parameter instead of a filename. | |
# That means you either have to open a file yourself with a specific handle (Here, I've arbitrarily used 200) | |
# or let the library handle it for you and somehow get the ID of the opened file. | |
flock -x -w $LOCK_TIMEOUT 200 | |
RETVAL=$? | |
# "flock" returns 1 if the timeout has occurred. | |
if [ $RETVAL -ne 0 ]; then | |
echo -e "Failed to aquire lock on $LOCK_FILE within $LOCK_TIMEOUT seconds. Is a similar script hung?" | |
exit $RETVAL | |
fi | |
# "flock" can run a command automatically with the "-c" flag, but if the command fails then it returns the command's exit code. | |
# That makes it impossible between the command failing and hitting the timeout. | |
echo -e "Running command: $@" | |
$@ | |
) 200>"$LOCK_FILE" | |
} | |
# Example usage: | |
# The motivation for this demo was that "docker network create" contains a race condition | |
# which allows multiple networks to be created with the same name if run twice simultaneously | |
# (https://github.com/moby/moby/issues/20648) | |
netname=my_network | |
lock_file=/tmp/docker-network-create-lock | |
lock_timeout=5 | |
function assert_one { | |
docker network ls --filter 'name=foo' --format '{{.ID}}' | |
if [[ $(docker network ls --filter "name=$1" --format '{{.ID}}' | wc -l) -eq 1 ]]; then | |
echo -e "\033[1;32m:) Pass\033[0m" | |
else | |
echo -e "\033[1;34m:( Fail\033[0m" | |
exit 1 | |
fi | |
} | |
function cleanup { | |
docker network ls --filter "name=$1" --format '{{.ID}}' | xargs --no-run-if-empty -n1 docker network rm | |
} | |
cleanup $netname | |
echo -e "Testing with timeout $lock_timeout" | |
lock_cmd $lock_file $lock_timeout docker network create $netname & | |
first_pid=$! | |
lock_cmd $lock_file $lock_timeout docker network create $netname & | |
second_pid=$! | |
tail --pid $first_pid -f /dev/null # Wait for processes to die | |
tail --pid $second_pid -f /dev/null | |
echo -e "There should be only one network:" | |
assert_one $netname | |
cleanup $netname | |
# The `flock` command has an optional timeout parameter. If we decrease it to zero, we see what happens when a command takes too long | |
echo -e "Testing with timeout 0" | |
lock_cmd $lock_file 0 docker network create $netname & | |
first_pid=$! | |
lock_cmd $lock_file 0 docker network create $netname & | |
second_pid=$! | |
tail --pid $first_pid -f /dev/null | |
tail --pid $second_pid -f /dev/null | |
assert_one $netname | |
cleanup $netname |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment