bobrik/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Expensive memory.stat + cpu.stat

We're seeing issues where cadvisor spends a lot of time on CPU.
Repro

The following should run on any system with systemd (code is in this gist):
go run main.go

The output for is is something like this:
completed:  4.32s [manual / mem-stat + cpu-stat]
completed:  4.25s [manual / cpu-stat + mem-stat]
completed:  0.47s [manual / mem-stat]
completed:  0.04s [manual / cpu-stat]
completed:  5.17s [manual / mem-stat + cpu-stat]
completed:  5.59s [manual / cpu-stat + mem-stat]
completed:  0.52s [manual / mem-stat]
completed:  0.04s [manual / cpu-stat]

The first three runs are a warm-up, and the second three runs is the meat and potatoes.
In cpu-stat + mem-stat we do effectively the following:
for _ in $(seq 1 1000); do cat /sys/fs/cgroup/system.slice/memory.stat /sys/fs/cgroup/system.slice/cpu.stat > /dev/null
In cpu-stat it's just cpu.stat file:
for _ in $(seq 1 1000); do cat /sys/fs/cgroup/system.slice/cpu.stat > /dev/null; done

In memory.stat it's just memory.stat file:
for _ in $(seq 1 1000); do cat /sys/fs/cgroup/system.slice/memory.stat > /dev/null; done

The weird thing is: combining the latter two is much faster than the former
and there's easily 10x difference in the time spent in kernel between them.
Unfortunately, the former is how monitoring software normally works:
all metrics are fetched for one service before proceeding to the next one.
One prominent example that prompted this examination is cAdvisor:

https://github.com/google/cadvisor

Flamegraphs for a single combined loop (one-loop.svg) and for two loops
back to back (two-loops.svg) are attached to this gist.
This doesn't happen on a mostly idle machine, but we do see this
on loaded servers (128 busy logical CPUs, lots of memory churn).

  
## main.go
package main

import (
	"fmt"
	"os"
	"time"
)

var suffixes = map[string][]string{
	"mem-all+cpu-stat":  []string{"memory.stat", "memory.current", "memory.max", "memory.swap.current", "memory.swap.max", "cpu.stat"},
	"mem-stat+cpu-stat": []string{"memory.stat", "cpu.stat"},
	"cpu-stat+mem-stat": []string{"cpu.stat", "memory.stat"},
	"mem-all":           []string{"memory.stat", "memory.current", "memory.max", "memory.swap.current", "memory.swap.max"},
	"mem-stat":          []string{"memory.stat"},
	"cpu-stat":          []string{"cpu.stat"},
}

func main() {
	onePathRepeated := make([]string, 1000)
	for i := range onePathRepeated {
		onePathRepeated[i] = "system.slice"
	}

	// Warm up round
	runBenchmarks(onePathRepeated)

	// Real run
	runBenchmarks(onePathRepeated)
}

func runBenchmarks(paths []string) {
	// benchmarkFunc("manual / mem-all + cpu-stat", func() {
	// 	manualBenchmark(paths, suffixes["mem-all+cpu-stat"])
	// })

	benchmarkFunc("manual / mem-stat + cpu-stat", func() {
		manualBenchmark(paths, suffixes["mem-stat+cpu-stat"])
	})

	benchmarkFunc("manual / cpu-stat + mem-stat", func() {
		manualBenchmark(paths, suffixes["cpu-stat+mem-stat"])
	})

	// benchmarkFunc("manual / mem-all", func() {
	// 	manualBenchmark(paths, suffixes["mem-all"])
	// })

	benchmarkFunc("manual / mem-stat", func() {
		manualBenchmark(paths, suffixes["mem-stat"])
	})

	benchmarkFunc("manual / cpu-stat", func() {
		manualBenchmark(paths, suffixes["cpu-stat"])
	})
}

func benchmarkFunc(name string, bench func()) {
	started := time.Now()

	bench()

	fmt.Printf("completed: %5.2fs [%s]\n", time.Since(started).Seconds(), name)
}

func manualBenchmark(paths []string, suffixes []string) {
	for _, path := range paths {
		_, err := getStatsManual("/sys/fs/cgroup/"+path, suffixes)
		if err != nil {
			fmt.Printf("Error getting stats: %v\n", err)
		}
	}
}

func getStatsManual(path string, suffixes []string) ([][]byte, error) {
	results := [][]byte{}

	for _, suffix := range suffixes {
		data, err := os.ReadFile(path + "/" + suffix)
		if err != nil {
			return nil, err
		}

		results = append(results, data)
	}

	return results, nil
}

## one-loop.svg

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              one-loop.svg
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## two-loops.svg

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              two-loops.svg
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	package main

	import (
	"fmt"
	"os"
	"time"
	)

	var suffixes = map[string][]string{
	"mem-all+cpu-stat": []string{"memory.stat", "memory.current", "memory.max", "memory.swap.current", "memory.swap.max", "cpu.stat"},
	"mem-stat+cpu-stat": []string{"memory.stat", "cpu.stat"},
	"cpu-stat+mem-stat": []string{"cpu.stat", "memory.stat"},
	"mem-all": []string{"memory.stat", "memory.current", "memory.max", "memory.swap.current", "memory.swap.max"},
	"mem-stat": []string{"memory.stat"},
	"cpu-stat": []string{"cpu.stat"},
	}

	func main() {
	onePathRepeated := make([]string, 1000)
	for i := range onePathRepeated {
	onePathRepeated[i] = "system.slice"
	}

	// Warm up round
	runBenchmarks(onePathRepeated)

	// Real run
	runBenchmarks(onePathRepeated)
	}

	func runBenchmarks(paths []string) {
	// benchmarkFunc("manual / mem-all + cpu-stat", func() {
	// manualBenchmark(paths, suffixes["mem-all+cpu-stat"])
	// })

	benchmarkFunc("manual / mem-stat + cpu-stat", func() {
	manualBenchmark(paths, suffixes["mem-stat+cpu-stat"])
	})

	benchmarkFunc("manual / cpu-stat + mem-stat", func() {
	manualBenchmark(paths, suffixes["cpu-stat+mem-stat"])
	})

	// benchmarkFunc("manual / mem-all", func() {
	// manualBenchmark(paths, suffixes["mem-all"])
	// })

	benchmarkFunc("manual / mem-stat", func() {
	manualBenchmark(paths, suffixes["mem-stat"])
	})

	benchmarkFunc("manual / cpu-stat", func() {
	manualBenchmark(paths, suffixes["cpu-stat"])
	})
	}

	func benchmarkFunc(name string, bench func()) {
	started := time.Now()

	bench()

	fmt.Printf("completed: %5.2fs [%s]\n", time.Since(started).Seconds(), name)
	}

	func manualBenchmark(paths []string, suffixes []string) {
	for _, path := range paths {
	_, err := getStatsManual("/sys/fs/cgroup/"+path, suffixes)
	if err != nil {
	fmt.Printf("Error getting stats: %v\n", err)
	}
	}
	}

	func getStatsManual(path string, suffixes []string) ([][]byte, error) {
	results := [][]byte{}

	for _, suffix := range suffixes {
	data, err := os.ReadFile(path + "/" + suffix)
	if err != nil {
	return nil, err
	}

	results = append(results, data)
	}

	return results, nil
	}