Last active
September 24, 2020 20:29
-
-
Save ncdc/53121e64f07d23672b901c6a66fb7e8f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/controllers/machinehealthcheck_targets.go b/controllers/machinehealthcheck_targets.go | |
index 6f03ed863..2a6de43e3 100644 | |
--- a/controllers/machinehealthcheck_targets.go | |
+++ b/controllers/machinehealthcheck_targets.go | |
@@ -58,6 +58,7 @@ const ( | |
// healthCheckTarget contains the information required to perform a health check | |
// on the node to determine if any remediation is required. | |
type healthCheckTarget struct { | |
+ Cluster *clusterv1.Cluster | |
Machine *clusterv1.Machine | |
Node *corev1.Node | |
MHC *clusterv1.MachineHealthCheck | |
@@ -116,16 +117,24 @@ func (t *healthCheckTarget) needsRemediation(logger logr.Logger, timeoutForMachi | |
// the node has not been set yet | |
if t.Node == nil { | |
- // status not updated yet | |
- if t.Machine.Status.LastUpdated == nil { | |
+ // TODO change to checking ControlPlaneReadyCondition in v1alpha4, when cluster.spec.controlPlaneRef will be required. | |
+ // We can't do this yet because ControlPlaneReadyCondition is only set when you're using a control plane provider, | |
+ // and that is optional in v1alpha3. | |
+ if !conditions.Has(t.Cluster, clusterv1.InfrastructureReadyCondition) || conditions.IsFalse(t.Cluster, clusterv1.InfrastructureReadyCondition) { | |
+ // Cluster infrastructure is not ready yet | |
return false, timeoutForMachineToHaveNode | |
} | |
- if t.Machine.Status.LastUpdated.Add(timeoutForMachineToHaveNode).Before(now) { | |
+ | |
+ infraReadyTime := conditions.GetLastTransitionTime(t.Cluster, clusterv1.InfrastructureReadyCondition) | |
+ if infraReadyTime == nil { | |
+ // TODO this should not be possible | |
+ } | |
+ if infraReadyTime.Add(timeoutForMachineToHaveNode).Before(now) { | |
conditions.MarkFalse(t.Machine, clusterv1.MachineHealthCheckSuccededCondition, clusterv1.NodeStartupTimeoutReason, clusterv1.ConditionSeverityWarning, "Node failed to report startup in %s", timeoutForMachineToHaveNode.String()) | |
logger.V(3).Info("Target is unhealthy: machine has no node", "duration", timeoutForMachineToHaveNode.String()) | |
return true, time.Duration(0) | |
} | |
- durationUnhealthy := now.Sub(t.Machine.Status.LastUpdated.Time) | |
+ durationUnhealthy := now.Sub(infraReadyTime.Time) | |
nextCheck := timeoutForMachineToHaveNode - durationUnhealthy + time.Second | |
return false, nextCheck | |
} | |
@@ -168,6 +177,11 @@ func (r *MachineHealthCheckReconciler) getTargetsFromMHC(clusterClient client.Re | |
return nil, nil | |
} | |
+ var cluster clusterv1.Cluster | |
+ if err := clusterClient.Get(context.TODO(), client.ObjectKey{Namespace: mhc.Namespace, Name: mhc.Spec.ClusterName}, &cluster); err != nil { | |
+ return nil, errors.Wrapf(err, "error getting Cluster %s/%s for MachineHealthCheck %s", mhc.Namespace, mhc.Spec.ClusterName, mhc.Name) | |
+ } | |
+ | |
targets := []healthCheckTarget{} | |
for k := range machines { | |
patchHelper, err := patch.NewHelper(&machines[k], r.Client) | |
@@ -175,6 +189,7 @@ func (r *MachineHealthCheckReconciler) getTargetsFromMHC(clusterClient client.Re | |
return nil, errors.Wrap(err, "unable to initialize patch helper") | |
} | |
target := healthCheckTarget{ | |
+ Cluster: &cluster, | |
MHC: mhc, | |
Machine: &machines[k], | |
patchHelper: patchHelper, |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment