Created
May 21, 2018 17:07
-
-
Save danmcd/77cbcfff9e8a53d4b3c0c31a019862e9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/overlay/generic/usr/lib/brand/jcommon/statechange b/overlay/generic/usr/lib/brand/jcommon/statechange | |
index 4f61a02..0d08300 100644 | |
--- a/overlay/generic/usr/lib/brand/jcommon/statechange | |
+++ b/overlay/generic/usr/lib/brand/jcommon/statechange | |
@@ -264,9 +264,34 @@ setup_net() | |
# it already exist? | |
# | |
if [[ -n "$isoverlay" ]]; then | |
- if ! dladm show-overlay $global_nic 2>/dev/null; then | |
- dladm create-overlay $rule -v $num $global_nic | |
+ ntries=0 | |
+ while [[ -d /var/run/ovcreate || | |
+ ! dladm show-overlay $global_nic 2>/dev/null ]]; do | |
+ ln -s /proc/$$ /var/run/ovcreate | |
if [[ $? -ne 0 ]]; then | |
+ # Someone beat us to creating | |
+ # the lock file. Pause, and try | |
+ # again, but not too many times. | |
+ ntries = $(($ntries + 1)) | |
+ if [[ $ntries -gt 5 ]]; then | |
+ logerr -p daemon.err \ | |
+ "zone $ZONENAME cannot " \ | |
+ "acquire ovcreate due to " \ | |
+ "this: " \ | |
+ `ls -l /var/run/ovcreate` | |
+ exit 1 | |
+ fi | |
+ # Continue through the while loop | |
+ # after a pause. | |
+ sleep 1 | |
+ continue | |
+ fi | |
+ dladm create-overlay $rule -v $num $global_nic | |
+ # Save the dladm return code so we can remove | |
+ # the lock immediately. | |
+ rc = $? | |
+ /bin/rm -f /var/run/ovcreate | |
+ if [[ $rc -ne 0 ]]; then | |
logger -p daemon.err "zone $ZONENAME " \ | |
"failed to create overlay device " \ | |
"$global_nic with command " \ | |
@@ -274,7 +299,7 @@ setup_net() | |
"$num $global_nic" | |
exit 1 | |
fi | |
- fi | |
+ done | |
fi | |
mgerdts
commented
May 21, 2018
And lock_exit is simply rm -f $lockdir/$lockname, right?
Alternate diff using what @mgerdts said.
diff --git a/overlay/generic/usr/lib/brand/jcommon/statechange b/overlay/generic/usr/lib/brand/jcommon/statechange
index 4f61a02..2a5b956 100644
--- a/overlay/generic/usr/lib/brand/jcommon/statechange
+++ b/overlay/generic/usr/lib/brand/jcommon/statechange
@@ -75,6 +75,42 @@ DEFAULT_MTU=1500
# o jst_mdatapath - The path the metadata socket is expected in the zone
#
+lock_enter()
+{
+ typeset lockname=$1
+ typeset lock=/var/run/$lockname
+ typeset target=/proc/$$
+
+ if [[ -z $lockname || $lockname == */* ]]; then
+ print -u2 "ERROR: invalid lock '$lockname'"
+ exit 1
+ fi
+ if [[ $lock -ef $target ]]; then
+ print -u2 "ERROR: recursive lock by pid $$"
+ exit 1
+ fi
+
+ while ! ln -s "$lock" "$target" >/dev/null 2>&1; do
+ if [[ -d $lock ]]; then
+ # Process holding the lock still exists
+ sleep 0.1
+ continue
+ fi
+
+ # Lock recovery. A little race here. Only encountered if
+ # a lock is abandoned
+ rm -f "$lock"
+ done
+}
+
+lock_exit()
+{
+ typeset lockname=$1
+ typeset lock=/var/run/$lockname
+
+ rm -f "$lock"
+}
+
get_boolean_nic_property()
{
bool_val=$(eval echo \$_ZONECFG_net_${1}_${2})
@@ -265,6 +301,7 @@ setup_net()
#
if [[ -n "$isoverlay" ]]; then
if ! dladm show-overlay $global_nic 2>/dev/null; then
+ lock_enter ovlock
dladm create-overlay $rule -v $num $global_nic
if [[ $? -ne 0 ]]; then
logger -p daemon.err "zone $ZONENAME " \
@@ -272,8 +309,10 @@ setup_net()
"$global_nic with command " \
"'dladm create-overlay $rule -v " \
"$num $global_nic"
+ lock_exit ovlock
exit 1
fi
+ lock_exit ovlock
fi
fi
Heh. Worked something up before coming back to see your changes. I think mine is pretty much the same as what you have but with some more checks around lock_exit(). Also added a warning about lock recovery.
lockdir=/var/run
function lock_enter {
typeset lockname=$1
typeset lock=$lockdir/$lockname
typeset target=/proc/$$
if [[ -z $lockname || $lockname == */* ]]; then
print -u2 "ERROR: invalid lock '$lockname'"
exit 1
fi
if [[ $lock -ef $target ]]; then
print -u2 "ERROR: recursive lock by pid $$"
exit 1
fi
while ! ln -s "$target" "$lock" >/dev/null 2>&1; do
if [[ -d $lock ]]; then
# Process holding the lock still exists
sleep 0.1
continue
fi
# Lock recovery. A little race here. Only encountered if
# a lock is abandoned.
typeset prev=$(ls -l "$lock" | nawk -F/ '{print $NF}')
print -u2 "WARNING: recovering lock $lock (abandoned by $prev)"
rm -f "$lock"
done
}
function lock_exit {
typeset lockname=$1
typeset lock=$lockdir/$lockname
typeset target=/proc/$$
if [[ -z $lockname || $lockname == */* ]]; then
print -u2 "ERROR: invalid lock '$lockname'"
exit 1
fi
if ! [[ $lock -ef $target ]]; then
print -u2 "ERROR: lock '$lockname' not held by pid $$"
exit 1
fi
rm -f "$lock"
}
Okay... final answer (edited for locking fix):
diff --git a/overlay/generic/usr/lib/brand/jcommon/statechange b/overlay/generic/usr/lib/brand/jcommon/statechange
index 4f61a02..457f101 100644
--- a/overlay/generic/usr/lib/brand/jcommon/statechange
+++ b/overlay/generic/usr/lib/brand/jcommon/statechange
@@ -75,6 +75,53 @@ DEFAULT_MTU=1500
# o jst_mdatapath - The path the metadata socket is expected in the zone
#
+lockdir=/var/run
+function lock_enter {
+ typeset lockname=$1
+ typeset lock=$lockdir/$lockname
+ typeset target=/proc/$$
+
+ if [[ -z $lockname || $lockname == */* ]]; then
+ print -u2 "ERROR: invalid lock '$lockname'"
+ exit 1
+ fi
+ if [[ $lock -ef $target ]]; then
+ print -u2 "ERROR: recursive lock by pid $$"
+ exit 1
+ fi
+
+ while ! ln -s "$target" "$lock" >/dev/null 2>&1; do
+ if [[ -d $lock ]]; then
+ # Process holding the lock still exists
+ sleep 0.1
+ continue
+ fi
+
+ # Lock recovery. A little race here. Only encountered if
+ # a lock is abandoned.
+ typeset prev=$(ls -l "$lock" | nawk -F/ '{print $NF}')
+ print -u2 "WARNING: recovering lock $lock (abandoned by $prev)"
+ rm -f "$lock"
+ done
+}
+
+function lock_exit {
+ typeset lockname=$1
+ typeset lock=$lockdir/$lockname
+ typeset target=/proc/$$
+
+ if [[ -z $lockname || $lockname == */* ]]; then
+ print -u2 "ERROR: invalid lock '$lockname'"
+ exit 1
+ fi
+ if ! [[ $lock -ef $target ]]; then
+ print -u2 "ERROR: lock '$lockname' not held by pid $$"
+ exit 1
+ fi
+
+ rm -f "$lock"
+}
+
get_boolean_nic_property()
{
bool_val=$(eval echo \$_ZONECFG_net_${1}_${2})
@@ -265,6 +312,7 @@ setup_net()
#
if [[ -n "$isoverlay" ]]; then
+ lock_enter ovlock
if ! dladm show-overlay $global_nic 2>/dev/null; then
dladm create-overlay $rule -v $num $global_nic
if [[ $? -ne 0 ]]; then
logger -p daemon.err "zone $ZONENAME " \
@@ -272,8 +320,10 @@ setup_net()
"$global_nic with command " \
"'dladm create-overlay $rule -v " \
"$num $global_nic"
+ lock_exit ovlock
exit 1
fi
fi
+ lock_exit ovlock
fi
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment