Skip to content

Instantly share code, notes, and snippets.

@ncdc
Last active September 24, 2020 20:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ncdc/53121e64f07d23672b901c6a66fb7e8f to your computer and use it in GitHub Desktop.
Save ncdc/53121e64f07d23672b901c6a66fb7e8f to your computer and use it in GitHub Desktop.
diff --git a/controllers/machinehealthcheck_targets.go b/controllers/machinehealthcheck_targets.go
index 6f03ed863..2a6de43e3 100644
--- a/controllers/machinehealthcheck_targets.go
+++ b/controllers/machinehealthcheck_targets.go
@@ -58,6 +58,7 @@ const (
// healthCheckTarget contains the information required to perform a health check
// on the node to determine if any remediation is required.
type healthCheckTarget struct {
+ Cluster *clusterv1.Cluster
Machine *clusterv1.Machine
Node *corev1.Node
MHC *clusterv1.MachineHealthCheck
@@ -116,16 +117,24 @@ func (t *healthCheckTarget) needsRemediation(logger logr.Logger, timeoutForMachi
// the node has not been set yet
if t.Node == nil {
- // status not updated yet
- if t.Machine.Status.LastUpdated == nil {
+ // TODO change to checking ControlPlaneReadyCondition in v1alpha4, when cluster.spec.controlPlaneRef will be required.
+ // We can't do this yet because ControlPlaneReadyCondition is only set when you're using a control plane provider,
+ // and that is optional in v1alpha3.
+ if !conditions.Has(t.Cluster, clusterv1.InfrastructureReadyCondition) || conditions.IsFalse(t.Cluster, clusterv1.InfrastructureReadyCondition) {
+ // Cluster infrastructure is not ready yet
return false, timeoutForMachineToHaveNode
}
- if t.Machine.Status.LastUpdated.Add(timeoutForMachineToHaveNode).Before(now) {
+
+ infraReadyTime := conditions.GetLastTransitionTime(t.Cluster, clusterv1.InfrastructureReadyCondition)
+ if infraReadyTime == nil {
+ // TODO this should not be possible
+ }
+ if infraReadyTime.Add(timeoutForMachineToHaveNode).Before(now) {
conditions.MarkFalse(t.Machine, clusterv1.MachineHealthCheckSuccededCondition, clusterv1.NodeStartupTimeoutReason, clusterv1.ConditionSeverityWarning, "Node failed to report startup in %s", timeoutForMachineToHaveNode.String())
logger.V(3).Info("Target is unhealthy: machine has no node", "duration", timeoutForMachineToHaveNode.String())
return true, time.Duration(0)
}
- durationUnhealthy := now.Sub(t.Machine.Status.LastUpdated.Time)
+ durationUnhealthy := now.Sub(infraReadyTime.Time)
nextCheck := timeoutForMachineToHaveNode - durationUnhealthy + time.Second
return false, nextCheck
}
@@ -168,6 +177,11 @@ func (r *MachineHealthCheckReconciler) getTargetsFromMHC(clusterClient client.Re
return nil, nil
}
+ var cluster clusterv1.Cluster
+ if err := clusterClient.Get(context.TODO(), client.ObjectKey{Namespace: mhc.Namespace, Name: mhc.Spec.ClusterName}, &cluster); err != nil {
+ return nil, errors.Wrapf(err, "error getting Cluster %s/%s for MachineHealthCheck %s", mhc.Namespace, mhc.Spec.ClusterName, mhc.Name)
+ }
+
targets := []healthCheckTarget{}
for k := range machines {
patchHelper, err := patch.NewHelper(&machines[k], r.Client)
@@ -175,6 +189,7 @@ func (r *MachineHealthCheckReconciler) getTargetsFromMHC(clusterClient client.Re
return nil, errors.Wrap(err, "unable to initialize patch helper")
}
target := healthCheckTarget{
+ Cluster: &cluster,
MHC: mhc,
Machine: &machines[k],
patchHelper: patchHelper,
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment