Skip to content

Instantly share code, notes, and snippets.

View bysnupy's full-sized avatar
🦉
Dry eyes ...

Daein Park bysnupy

🦉
Dry eyes ...
View GitHub Profile
@bysnupy
bysnupy / pleg_unhealthy_healthy_code_part.md
Created October 19, 2019 06:56
PLEG is not healthy: healthy() code part
//// pkg/kubelet/pleg/generic.go - Healthy()

// The threshold needs to be greater than the relisting period + the
// relisting time, which can vary significantly. Set a conservative
// threshold to avoid flipping between healthy and unhealthy.
relistThreshold = 3 * time.Minute
:
func (g *GenericPLEG) Healthy() (bool, error) {
  relistTime := g.getRelistTime()
@bysnupy
bysnupy / pleg_unhealthy_relist_code_part.md
Created October 19, 2019 07:04
PLEG is not healthy relist source code part1
//// pkg/kubelet/kubelet.go - NewMainKubelet()

// Generic PLEG relies on relisting for discovering container events.
// A longer period means that kubelet will take longer to detect container
// changes and to update pod status. On the other hand, a shorter period
// will cause more frequent relisting (e.g., container runtime operations),
// leading to higher cpu usage.
// Note that even though we set the period to 1s, the relisting itself can
// take more than 1s to finish if the container runtime responds slowly
@bysnupy
bysnupy / pleg_unhealthy_relist_getpods_code_part.md
Created October 19, 2019 07:06
PLEG is not healthy getpods source code in relist
//// pkg/kubelet/pleg/generic.go - relist()
  :
  // get a current timestamp
  timestamp := g.clock.Now()

  // kubelet_pleg_relist_latency_microseconds for prometheus metrics
	defer func() {
		metrics.PLEGRelistLatency.Observe(metrics.SinceInMicroseconds(timestamp))
	}()
@bysnupy
bysnupy / pleg_unhealthy_relist_getpods_code_part.md
Created October 19, 2019 07:06
PLEG is not healthy getpods source code in relist
//// pkg/kubelet/pleg/generic.go - relist()
  :
  // get a current timestamp
  timestamp := g.clock.Now()

  // kubelet_pleg_relist_latency_microseconds for prometheus metrics
	defer func() {
		metrics.PLEGRelistLatency.Observe(metrics.SinceInMicroseconds(timestamp))
	}()
@bysnupy
bysnupy / pleg_unhealthy_getpods_trace_code_part.md
Created October 19, 2019 07:08
PLEG is un healthy getpods trace source code part in relist
//// pkg/kubelet/kuberuntime/kuberuntime_manager.go - GetPods()

// GetPods returns a list of containers grouped by pods. The boolean parameter
// specifies whether the runtime returns all containers including those already
// exited and dead containers (used for garbage collection).
func (m *kubeGenericRuntimeManager) GetPods(all bool) ([]*kubecontainer.Pod, error) {
	pods := make(map[kubetypes.UID]*kubecontainer.Pod)
	sandboxes, err := m.getKubeletSandboxes(all)
:
@bysnupy
bysnupy / pleg_unhealthy_relist_updaterelisttime_code_part.md
Created October 19, 2019 07:10
PLEG is not healthy updateRelistTime source code in relist
//// pkg/kubelet/pleg/generic.go - relist()
  // update as a current timestamp
  g.updateRelistTime(timestamp)
@bysnupy
bysnupy / pleg_unhealthy_relist_upateevents_code_part.md
Created October 19, 2019 07:12
PLEG is not healthy updateevents source code in relist
//// pkg/kubelet/pleg/generic.go - relist()

  pods := kubecontainer.Pods(podList)
  g.podRecords.setCurrent(pods)

  // Compare the old and the current pods, and generate events.
  eventsByPodID := map[types.UID][]*PodLifecycleEvent{}
  for pid := range g.podRecords {
 oldPod := g.podRecords.getOld(pid)
@bysnupy
bysnupy / pleg_unhealthy_generateevents_trace_code_part.md
Created October 19, 2019 07:13
PLEG is not healthy generateEvents source code
//// pkg/kubelet/pleg/generic.go - computeEvents()

func computeEvents(oldPod, newPod *kubecontainer.Pod, cid *kubecontainer.ContainerID) []*PodLifecycleEvent {
:
    return generateEvents(pid, cid.ID, oldState, newState)
}

//// pkg/kubelet/pleg/generic.go - generateEvents()
@bysnupy
bysnupy / pleg_unhealthy_relist_updatecache_code_part.md
Created October 19, 2019 07:15
PLEG is not healthy updateCache source code in relist
//// pkg/kubelet/pleg/generic.go - relist()

  // If there are events associated with a pod, we should update the
  // podCache.
  for pid, events := range eventsByPodID {
    pod := g.podRecords.getCurrent(pid)
    if g.cacheEnabled() {
      // updateCache() will inspect the pod and update the cache. If an
      // error occurs during the inspection, we want PLEG to retry again
@bysnupy
bysnupy / pleg_unhealthy_updatecache_trace_code_part.md
Created October 19, 2019 07:17
PLEG is not healthy updateCache call trace source code
//// pkg/kubelet/pleg/generic.go - updateCache()

func (g *GenericPLEG) updateCache(pod *kubecontainer.Pod, pid types.UID) error {
:
	timestamp := g.clock.Now()
	// TODO: Consider adding a new runtime method
	// GetPodStatus(pod *kubecontainer.Pod) so that Docker can avoid listing
	// all containers again.
	status, err := g.runtime.GetPodStatus(pod.ID, pod.Name, pod.Namespace)