TJM/cluster_patching.pp

## cluster_patching.pp
# Wrapper for patchy:patching Plan for running 'patching' one node at a time, rather than all at once
plan patchy::cluster_patching (
  TargetSpec $targets,
  Optional[Enum['always', 'never', 'patched', 'smart']] $reboot = 'patched',
  Optional[String] $yum_params = undef,
  Optional[String] $dpkg_params = undef,
  Optional[String] $zypper_params = undef,
  Optional[Integer] $patch_task_timeout = 3600,
  Optional[Integer] $health_check_runinterval = 1800,
  Optional[Integer] $reboot_wait_time = 600,
  Optional[Boolean] $security_only = false,
  Optional[Boolean] $run_health_check = true,
  Optional[Boolean] $clean_cache = false,
  Optional[Boolean] $health_check_noop = false,
  Optional[Boolean] $health_check_use_cached_catalog = false,
  Optional[Boolean] $health_check_service_enabled = true,
  Optional[Boolean] $health_check_service_running = true,
  Optional[Pe_patch::Absolutepath] $post_reboot_scriptpath = undef,
){
  # Get Target List,
  $target_array = get_targets($targets)

  # Recreate Argument List
  $args = {
    'reboot'     => $reboot,
    'yum_params' => $yum_params,
    'dpkg_params' => $dpkg_params,
    'zypper_params' => undef,
    'patch_task_timeout' => $patch_task_timeout,
    'health_check_runinterval' => $health_check_runinterval,
    'reboot_wait_time' => $reboot_wait_time,
    'security_only' => $security_only,
    'run_health_check' => $run_health_check,
    'clean_cache' => $clean_cache,
    'health_check_noop' => $health_check_noop,
    'health_check_use_cached_catalog' => $health_check_use_cached_catalog,
    'health_check_service_enabled' => $health_check_service_enabled,
    'health_check_service_running' => $health_check_service_running,
    'post_reboot_scriptpath' => $post_reboot_scriptpath,
  }

  $results = $target_array.reduce({}) |$memo, $target| {
    $result = run_plan('patchy::patching', $target, $args)
    $memo + { $target.name => $result }
  }

  # Output the results
  return($results)
}

## patching.pp
# This is a copy of pe_patch::group_patching, but adapted to take $targets instead of a patch_group
plan patchy::patching (
  TargetSpec $targets,
  Optional[Enum['always', 'never', 'patched', 'smart']] $reboot = 'patched',
  Optional[String] $yum_params = undef,
  Optional[String] $dpkg_params = undef,
  Optional[String] $zypper_params = undef,
  Optional[Integer] $patch_task_timeout = 3600,
  Optional[Integer] $health_check_runinterval = 1800,
  Optional[Integer] $reboot_wait_time = 600,
  Optional[Boolean] $security_only = false,
  Optional[Boolean] $run_health_check = true,
  Optional[Boolean] $clean_cache = false,
  Optional[Boolean] $health_check_noop = false,
  Optional[Boolean] $health_check_use_cached_catalog = false,
  Optional[Boolean] $health_check_service_enabled = true,
  Optional[Boolean] $health_check_service_running = true,
  Optional[Pe_patch::Absolutepath] $post_reboot_scriptpath = undef,
){
  # Get Target List,
  $target_array = get_targets($targets)

  unless $target_array.empty {
    ### Health Check, Input: $target_array, Output: $patch_ready ###
    ### Add'l result params: $puppet_not_healthy, $pre_patch_puppet_run_failed ###
    if $run_health_check {
      # Check the health of the puppet agent on all nodes
      # Ensure puppet configuration is as expected, agent hasn't been disabled
      # with puppet agent --disable, puppet ssl verify passes, the puppet
      # service is in the right state, all servers are reachable, and the
      # last puppet run didn't have failures.
      $agent_health = run_task('pe_patch::agent_health', $target_array,
        target_runinterval              => $health_check_runinterval,
        target_noop_state               => $health_check_noop,
        target_use_cached_catalog_state => $health_check_use_cached_catalog,
        target_service_enabled          => $health_check_service_enabled,
        target_service_running          => $health_check_service_running,
        '_catch_errors'                 => true)

      # Pull out list of those that are ok/in error
      $puppet_healthy = $agent_health.ok_set.names
      $puppet_not_healthy = $agent_health.error_set.results.map | $error | { $error.error.details }

      if $puppet_healthy.empty {
        $patch_ready = []
      } else {
        $pre_patch_run_puppet_check = run_task('enterprise_tasks::run_puppet', $puppet_healthy,
          max_timeout     => 256,
          '_catch_errors' => true)
        $patch_ready = $pre_patch_run_puppet_check.ok_set.names
        $pre_patch_puppet_run_failed = $pre_patch_run_puppet_check.error_set.names
      }
    } else {
      $patch_ready = $certnames
    }

    ### Patching, Input: $patch_ready, Output: $post_patch_ready ###
    ### Add'l result params: $not_patched, $reboot_timed_out ###
    if $patch_ready.empty {
      $post_patch_ready = []
    } else {
      # So we can detect when a node has rebooted
      $begin_boot_time_results = without_default_logging() || {
        run_task('pe_patch::last_boot_time', $patch_ready)
      }

      # Actually carry out the patching on all healthy nodes
      $patch_result = run_task('pe_patch::patch_server',
                            $patch_ready,
                            yum_params      => $yum_params,
                            dpkg_params     => $dpkg_params,
                            zypper_params   => $zypper_params,
                            timeout         => $patch_task_timeout,
                            reboot          => $reboot,
                            security_only   => $security_only,
                            clean_cache     => $clean_cache,
                            '_catch_errors' => true)

      # Pull out list of those that are ok/in error
      $patched = $patch_result.ok_set.names
      $not_patched = $patch_result.error_set.names
      $rebooting_result = $patch_result.ok_set.results.filter | $result | { $result.value['was_rebooted'] }
      $rebooting = $rebooting_result.map | $result | { $result.target.name }

      ### Wait for Reboot ###
      if $rebooting.empty {
        $post_patch_ready = $patched
      } else {
        # Adapted from puppetlabs-reboot
        $start_time = Timestamp()
        $wait_results = without_default_logging() || {
          $reboot_wait_time.reduce({'pending' => $rebooting, 'ok' => []}) |$memo, $_| {
            if ($memo['pending'].empty or $memo['timed_out']) {
              break()
            }

            $plural = $memo['pending'].size > 1 ? {
              true => 's',
              default => '',
            }
            out::message("Waiting for ${$memo['pending'].size} node${plural} to reboot. Note that a failed pe_patch::last_boot_time task is normal while a target is in the middle of rebooting, and may be safely ignored.")
            $current_boot_time_results = run_task('pe_patch::last_boot_time', $memo['pending'], _catch_errors => true)

            $failed_results = $current_boot_time_results.filter |$current_boot_time_res| {
              # If we errored, need to check again, since it's probably still rebooting
              if !$current_boot_time_res.ok {
                true
              } else {
                # If the boot time is the same as it was before we patched,
                # we haven't rebooted yet and need to check again.
                $target_name = $current_boot_time_res.target.name
                $begin_boot_time_res = $begin_boot_time_results.find($target_name)
                $current_boot_time_res.value == $begin_boot_time_res.value
              }
            }

            # Turn array of results into ResultSet to we can extract Targets
            $failed_targets = ResultSet($failed_results).targets.map |$t| { $t.name }
            $ok_targets = $memo['pending'] - $failed_targets

            $elapsed_time_sec = Integer(Timestamp() - $start_time)
            $timed_out = $elapsed_time_sec >= $reboot_wait_time

            if !$failed_targets.empty and !$timed_out {
              # Wait for targets to be available again before rechecking. If we end up failing
              # this wait on any of those nodes, we'll catch it in the next iteration.
              pe_patch::sleep(30)
              $remaining_time = $reboot_wait_time - $elapsed_time_sec
              wait_until_available($failed_targets, wait_time => $remaining_time, retry_interval => 1, '_catch_errors' => true)
            }

            ({
              'pending' => $failed_targets,
              'ok'      => $memo['ok'] + $ok_targets,
              'timed_out' => $timed_out,
            })
          }
        }
        $reboot_timed_out = $wait_results['pending']
        $post_patch_ready = $patched - $reboot_timed_out
      }
    }

    ### Post reboot script, Input: $post_patch_ready, Output: None ###
    # Run the post_reboot_scriptpath, if defined. Don't fail the plan
    # if the script fails. The user will be able to see the result in
    # the console.
    if $post_reboot_scriptpath {
      run_command($post_reboot_scriptpath, $post_patch_ready, '_catch_errors' => true)
    }

    ### Post patching health check, Input: $post_patch_ready, Output: $post_patch_puppet_run_passed ###
    ### Add'l result params: $post_patch_puppet_run_failed ###
    if $post_patch_ready.empty or !$run_health_check {
      $post_patch_puppet_run_passed = $post_patch_ready
    } else {
      # Sometimes a puppet run immediately after reboot fails, so give it a bit of time.
      pe_patch::sleep(30)
      $post_puppet_check = run_task('enterprise_tasks::run_puppet', $post_patch_ready,
        max_timeout     => 256,
        '_catch_errors' => true)
      $post_patch_puppet_run_passed = $post_puppet_check.ok_set.names
      $post_patch_puppet_run_failed = $post_puppet_check.error_set.names
    }
  }

  ### Defaults ###
  # Note: $targets and $target_array are always defined,
  # so no need to set a default value here.
  $puppet_not_healthy_result = defined('$puppet_not_healthy') ? {
    true    => $puppet_not_healthy,
    default => [],
  }

  $pre_patch_puppet_run_failed_result = defined('$pre_patch_puppet_run_failed') ? {
    true    => $pre_patch_puppet_run_failed,
    default => [],
  }

  $patched_result = defined('$patched') ? {
    true    => $patched,
    default => [],
  }

  $not_patched_result = defined('$not_patched') ? {
    true    => $not_patched,
    default => [],
  }

  $post_patch_puppet_run_failed_result = defined('$post_patch_puppet_run_failed') ? {
    true    => $post_patch_puppet_run_failed,
    default => [],
  }

  $reboot_timed_out_result = defined('$reboot_timed_out') ? {
    true    => $reboot_timed_out,
    default => [],
  }

  # Output the results
  return({
    'targets'                      => $targets,
    'patchable_nodes'              => $target_array,
    'puppet_health_check_failed'   => $puppet_not_healthy_result,
    'pre_patch_puppet_run_failed'  => $pre_patch_puppet_run_failed_result,
    'patching_failed'              => $not_patched_result,
    'post_patch_puppet_run_failed' => $post_patch_puppet_run_failed_result,
    'reboot_timed_out'             => $reboot_timed_out_result,
    'nodes_patched'                => $patched_result,
    'counts'                       => {
      'patchable_nodes_count'           => $target_array.length,
      'puppet_health_check_failed'      => $puppet_not_healthy_result.length,
      'pre_patch_puppet_run_failed'     => $pre_patch_puppet_run_failed_result.length,
      'patching_failed'                 => $not_patched_result.length,
      'post_patch_puppet_run_failed'    => $post_patch_puppet_run_failed_result.length,
      'reboot_timed_out'                => $reboot_timed_out_result.length,
      'nodes_patched'                   => $patched_result.length,
    }
  })
}
	# Wrapper for patchy:patching Plan for running 'patching' one node at a time, rather than all at once
	plan patchy::cluster_patching (
	TargetSpec $targets,
	Optional[Enum['always', 'never', 'patched', 'smart']] $reboot = 'patched',
	Optional[String] $yum_params = undef,
	Optional[String] $dpkg_params = undef,
	Optional[String] $zypper_params = undef,
	Optional[Integer] $patch_task_timeout = 3600,
	Optional[Integer] $health_check_runinterval = 1800,
	Optional[Integer] $reboot_wait_time = 600,
	Optional[Boolean] $security_only = false,
	Optional[Boolean] $run_health_check = true,
	Optional[Boolean] $clean_cache = false,
	Optional[Boolean] $health_check_noop = false,
	Optional[Boolean] $health_check_use_cached_catalog = false,
	Optional[Boolean] $health_check_service_enabled = true,
	Optional[Boolean] $health_check_service_running = true,
	Optional[Pe_patch::Absolutepath] $post_reboot_scriptpath = undef,
	){
	# Get Target List,
	$target_array = get_targets($targets)

	# Recreate Argument List
	$args = {
	'reboot' => $reboot,
	'yum_params' => $yum_params,
	'dpkg_params' => $dpkg_params,
	'zypper_params' => undef,
	'patch_task_timeout' => $patch_task_timeout,
	'health_check_runinterval' => $health_check_runinterval,
	'reboot_wait_time' => $reboot_wait_time,
	'security_only' => $security_only,
	'run_health_check' => $run_health_check,
	'clean_cache' => $clean_cache,
	'health_check_noop' => $health_check_noop,
	'health_check_use_cached_catalog' => $health_check_use_cached_catalog,
	'health_check_service_enabled' => $health_check_service_enabled,
	'health_check_service_running' => $health_check_service_running,
	'post_reboot_scriptpath' => $post_reboot_scriptpath,
	}

	$results = $target_array.reduce({}) \|$memo, $target\| {
	$result = run_plan('patchy::patching', $target, $args)
	$memo + { $target.name => $result }
	}

	# Output the results
	return($results)
	}
	# This is a copy of pe_patch::group_patching, but adapted to take $targets instead of a patch_group
	plan patchy::patching (
	TargetSpec $targets,
	Optional[Enum['always', 'never', 'patched', 'smart']] $reboot = 'patched',
	Optional[String] $yum_params = undef,
	Optional[String] $dpkg_params = undef,
	Optional[String] $zypper_params = undef,
	Optional[Integer] $patch_task_timeout = 3600,
	Optional[Integer] $health_check_runinterval = 1800,
	Optional[Integer] $reboot_wait_time = 600,
	Optional[Boolean] $security_only = false,
	Optional[Boolean] $run_health_check = true,
	Optional[Boolean] $clean_cache = false,
	Optional[Boolean] $health_check_noop = false,
	Optional[Boolean] $health_check_use_cached_catalog = false,
	Optional[Boolean] $health_check_service_enabled = true,
	Optional[Boolean] $health_check_service_running = true,
	Optional[Pe_patch::Absolutepath] $post_reboot_scriptpath = undef,
	){
	# Get Target List,
	$target_array = get_targets($targets)

	unless $target_array.empty {
	### Health Check, Input: $target_array, Output: $patch_ready ###
	### Add'l result params: $puppet_not_healthy, $pre_patch_puppet_run_failed ###
	if $run_health_check {
	# Check the health of the puppet agent on all nodes
	# Ensure puppet configuration is as expected, agent hasn't been disabled
	# with puppet agent --disable, puppet ssl verify passes, the puppet
	# service is in the right state, all servers are reachable, and the
	# last puppet run didn't have failures.
	$agent_health = run_task('pe_patch::agent_health', $target_array,
	target_runinterval => $health_check_runinterval,
	target_noop_state => $health_check_noop,
	target_use_cached_catalog_state => $health_check_use_cached_catalog,
	target_service_enabled => $health_check_service_enabled,
	target_service_running => $health_check_service_running,
	'_catch_errors' => true)

	# Pull out list of those that are ok/in error
	$puppet_healthy = $agent_health.ok_set.names
	$puppet_not_healthy = $agent_health.error_set.results.map \| $error \| { $error.error.details }

	if $puppet_healthy.empty {
	$patch_ready = []
	} else {
	$pre_patch_run_puppet_check = run_task('enterprise_tasks::run_puppet', $puppet_healthy,
	max_timeout => 256,
	'_catch_errors' => true)
	$patch_ready = $pre_patch_run_puppet_check.ok_set.names
	$pre_patch_puppet_run_failed = $pre_patch_run_puppet_check.error_set.names
	}
	} else {
	$patch_ready = $certnames
	}

	### Patching, Input: $patch_ready, Output: $post_patch_ready ###
	### Add'l result params: $not_patched, $reboot_timed_out ###
	if $patch_ready.empty {
	$post_patch_ready = []
	} else {
	# So we can detect when a node has rebooted
	$begin_boot_time_results = without_default_logging() \|\| {
	run_task('pe_patch::last_boot_time', $patch_ready)
	}

	# Actually carry out the patching on all healthy nodes
	$patch_result = run_task('pe_patch::patch_server',
	$patch_ready,
	yum_params => $yum_params,
	dpkg_params => $dpkg_params,
	zypper_params => $zypper_params,
	timeout => $patch_task_timeout,
	reboot => $reboot,
	security_only => $security_only,
	clean_cache => $clean_cache,
	'_catch_errors' => true)

	# Pull out list of those that are ok/in error
	$patched = $patch_result.ok_set.names
	$not_patched = $patch_result.error_set.names
	$rebooting_result = $patch_result.ok_set.results.filter \| $result \| { $result.value['was_rebooted'] }
	$rebooting = $rebooting_result.map \| $result \| { $result.target.name }

	### Wait for Reboot ###
	if $rebooting.empty {
	$post_patch_ready = $patched
	} else {
	# Adapted from puppetlabs-reboot
	$start_time = Timestamp()
	$wait_results = without_default_logging() \|\| {
	$reboot_wait_time.reduce({'pending' => $rebooting, 'ok' => []}) \|$memo, $_\| {
	if ($memo['pending'].empty or $memo['timed_out']) {
	break()
	}

	$plural = $memo['pending'].size > 1 ? {
	true => 's',
	default => '',
	}
	out::message("Waiting for ${$memo['pending'].size} node${plural} to reboot. Note that a failed pe_patch::last_boot_time task is normal while a target is in the middle of rebooting, and may be safely ignored.")
	$current_boot_time_results = run_task('pe_patch::last_boot_time', $memo['pending'], _catch_errors => true)

	$failed_results = $current_boot_time_results.filter \|$current_boot_time_res\| {
	# If we errored, need to check again, since it's probably still rebooting
	if !$current_boot_time_res.ok {
	true
	} else {
	# If the boot time is the same as it was before we patched,
	# we haven't rebooted yet and need to check again.
	$target_name = $current_boot_time_res.target.name
	$begin_boot_time_res = $begin_boot_time_results.find($target_name)
	$current_boot_time_res.value == $begin_boot_time_res.value
	}
	}

	# Turn array of results into ResultSet to we can extract Targets
	$failed_targets = ResultSet($failed_results).targets.map \|$t\| { $t.name }
	$ok_targets = $memo['pending'] - $failed_targets

	$elapsed_time_sec = Integer(Timestamp() - $start_time)
	$timed_out = $elapsed_time_sec >= $reboot_wait_time

	if !$failed_targets.empty and !$timed_out {
	# Wait for targets to be available again before rechecking. If we end up failing
	# this wait on any of those nodes, we'll catch it in the next iteration.
	pe_patch::sleep(30)
	$remaining_time = $reboot_wait_time - $elapsed_time_sec
	wait_until_available($failed_targets, wait_time => $remaining_time, retry_interval => 1, '_catch_errors' => true)
	}

	({
	'pending' => $failed_targets,
	'ok' => $memo['ok'] + $ok_targets,
	'timed_out' => $timed_out,
	})
	}
	}
	$reboot_timed_out = $wait_results['pending']
	$post_patch_ready = $patched - $reboot_timed_out
	}
	}

	### Post reboot script, Input: $post_patch_ready, Output: None ###
	# Run the post_reboot_scriptpath, if defined. Don't fail the plan
	# if the script fails. The user will be able to see the result in
	# the console.
	if $post_reboot_scriptpath {
	run_command($post_reboot_scriptpath, $post_patch_ready, '_catch_errors' => true)
	}

	### Post patching health check, Input: $post_patch_ready, Output: $post_patch_puppet_run_passed ###
	### Add'l result params: $post_patch_puppet_run_failed ###
	if $post_patch_ready.empty or !$run_health_check {
	$post_patch_puppet_run_passed = $post_patch_ready
	} else {
	# Sometimes a puppet run immediately after reboot fails, so give it a bit of time.
	pe_patch::sleep(30)
	$post_puppet_check = run_task('enterprise_tasks::run_puppet', $post_patch_ready,
	max_timeout => 256,
	'_catch_errors' => true)
	$post_patch_puppet_run_passed = $post_puppet_check.ok_set.names
	$post_patch_puppet_run_failed = $post_puppet_check.error_set.names
	}
	}

	### Defaults ###
	# Note: $targets and $target_array are always defined,
	# so no need to set a default value here.
	$puppet_not_healthy_result = defined('$puppet_not_healthy') ? {
	true => $puppet_not_healthy,
	default => [],
	}

	$pre_patch_puppet_run_failed_result = defined('$pre_patch_puppet_run_failed') ? {
	true => $pre_patch_puppet_run_failed,
	default => [],
	}

	$patched_result = defined('$patched') ? {
	true => $patched,
	default => [],
	}

	$not_patched_result = defined('$not_patched') ? {
	true => $not_patched,
	default => [],
	}

	$post_patch_puppet_run_failed_result = defined('$post_patch_puppet_run_failed') ? {
	true => $post_patch_puppet_run_failed,
	default => [],
	}

	$reboot_timed_out_result = defined('$reboot_timed_out') ? {
	true => $reboot_timed_out,
	default => [],
	}

	# Output the results
	return({
	'targets' => $targets,
	'patchable_nodes' => $target_array,
	'puppet_health_check_failed' => $puppet_not_healthy_result,
	'pre_patch_puppet_run_failed' => $pre_patch_puppet_run_failed_result,
	'patching_failed' => $not_patched_result,
	'post_patch_puppet_run_failed' => $post_patch_puppet_run_failed_result,
	'reboot_timed_out' => $reboot_timed_out_result,
	'nodes_patched' => $patched_result,
	'counts' => {
	'patchable_nodes_count' => $target_array.length,
	'puppet_health_check_failed' => $puppet_not_healthy_result.length,
	'pre_patch_puppet_run_failed' => $pre_patch_puppet_run_failed_result.length,
	'patching_failed' => $not_patched_result.length,
	'post_patch_puppet_run_failed' => $post_patch_puppet_run_failed_result.length,
	'reboot_timed_out' => $reboot_timed_out_result.length,
	'nodes_patched' => $patched_result.length,
	}
	})
	}