|
package main |
|
|
|
import ( |
|
"bufio" |
|
"bytes" |
|
"fmt" |
|
"go/token" |
|
"io" |
|
"os" |
|
"os/exec" |
|
"path" |
|
"path/filepath" |
|
"regexp" |
|
"sort" |
|
"strconv" |
|
"strings" |
|
"time" |
|
) |
|
|
|
/* |
|
Excerpt from man signal(2): |
|
|
|
Interruption of system calls and library functions by signal handlers |
|
If a signal handler is invoked while a system call or library |
|
function call is blocked, then either: |
|
|
|
* the call is automatically restarted after the signal handler |
|
returns; or |
|
|
|
* the call fails with the error EINTR. |
|
|
|
Which of these two behaviors occurs depends on the interface and |
|
whether or not the signal handler was established using the |
|
SA_RESTART flag (see sigaction(2)). The details vary across UNIX |
|
systems; below, the details for Linux. |
|
|
|
If a blocked call to one of the following interfaces is |
|
interrupted by a signal handler, then the call is automatically |
|
restarted after the signal handler returns if the SA_RESTART flag |
|
was used; otherwise the call fails with the error EINTR: |
|
|
|
* read(2), readv(2), write(2), writev(2), and ioctl(2) calls on |
|
"slow" devices. A "slow" device is one where the I/O call may |
|
block for an indefinite time, for example, a terminal, pipe, or |
|
socket. If an I/O call on a slow device has already |
|
transferred some data by the time it is interrupted by a signal |
|
handler, then the call will return a success status (normally, |
|
the number of bytes transferred). Note that a (local) disk is |
|
not a slow device according to this definition; I/O operations |
|
on disk devices are not interrupted by signals. |
|
|
|
* open(2), if it can block (e.g., when opening a FIFO; see |
|
fifo(7)). |
|
|
|
* wait(2), wait3(2), wait4(2), waitid(2), and waitpid(2). |
|
|
|
* Socket interfaces: accept(2), connect(2), recv(2), recvfrom(2), |
|
recvmmsg(2), recvmsg(2), send(2), sendto(2), and sendmsg(2), |
|
unless a timeout has been set on the socket (see below). |
|
|
|
* File locking interfaces: flock(2) and the F_SETLKW and |
|
F_OFD_SETLKW operations of fcntl(2) |
|
|
|
* POSIX message queue interfaces: mq_receive(3), |
|
mq_timedreceive(3), mq_send(3), and mq_timedsend(3). |
|
|
|
* futex(2) FUTEX_WAIT (since Linux 2.6.22; beforehand, always |
|
failed with EINTR). |
|
|
|
* getrandom(2). |
|
|
|
* pthread_mutex_lock(3), pthread_cond_wait(3), and related APIs. |
|
|
|
* futex(2) FUTEX_WAIT_BITSET. |
|
|
|
* POSIX semaphore interfaces: sem_wait(3) and sem_timedwait(3) |
|
(since Linux 2.6.22; beforehand, always failed with EINTR). |
|
|
|
* read(2) from an inotify(7) file descriptor (since Linux 3.8; |
|
beforehand, always failed with EINTR). |
|
|
|
The following interfaces are never restarted after being |
|
interrupted by a signal handler, regardless of the use of |
|
SA_RESTART; they always fail with the error EINTR when |
|
interrupted by a signal handler: |
|
|
|
* "Input" socket interfaces, when a timeout (SO_RCVTIMEO) has |
|
been set on the socket using setsockopt(2): accept(2), recv(2), |
|
recvfrom(2), recvmmsg(2) (also with a non-NULL timeout |
|
argument), and recvmsg(2). |
|
|
|
* "Output" socket interfaces, when a timeout (SO_RCVTIMEO) has |
|
been set on the socket using setsockopt(2): connect(2), |
|
send(2), sendto(2), and sendmsg(2). |
|
|
|
* Interfaces used to wait for signals: pause(2), sigsuspend(2), |
|
sigtimedwait(2), and sigwaitinfo(2). |
|
|
|
* File descriptor multiplexing interfaces: epoll_wait(2), |
|
epoll_pwait(2), poll(2), ppoll(2), select(2), and pselect(2). |
|
|
|
* System V IPC interfaces: msgrcv(2), msgsnd(2), semop(2), and |
|
semtimedop(2). |
|
|
|
* Sleep interfaces: clock_nanosleep(2), nanosleep(2), and |
|
usleep(3). |
|
|
|
* io_getevents(2). |
|
|
|
The sleep(3) function is also never restarted if interrupted by a |
|
handler, but gives a success return: the number of seconds |
|
remaining to sleep. |
|
|
|
*/ |
|
var eintr_syscalls = map[string]bool{ |
|
"SYS_READ": true, |
|
"SYS_READV": true, |
|
"SYS_WRITE": true, |
|
"SYS_WRITEV":true, |
|
"SYS_IOCTL":true, |
|
"SYS_OPEN":true, |
|
"SYS_WAIT":true, |
|
"SYS_WAIT3":true, |
|
"SYS_WAIT4":true, |
|
"SYS_WAITID":true, |
|
"SYS_WAITPID":true, |
|
"SYS_ACCEPT": true, "SYS_ACCEPT4": true, /* only accept(2) is mentioned, but accept4(2) can be the same as accept(2) if flags is 0 */ |
|
"SYS_CONNECT": true, |
|
"SYS_RECV": true, |
|
"SYS_RECVFROM": true, |
|
"SYS_RECVMMSG": true, "sysRECVMMSG": true, |
|
"SYS_RECVMSG": true, |
|
"SYS_SEND": true, |
|
"SYS_SENDTO": true, |
|
"SYS_SENDMSG": true, |
|
"SYS_FLOCK": true, |
|
"SYS_FCNTL": true, "fcntl64Syscall": true, |
|
"SYS_FUTEX": true, |
|
|
|
"SYS_PAUSE": true, |
|
"SYS_SIGSUSPEND": true, |
|
"SYS_SIGTIMEDWAIT": true, |
|
"SYS_SIGWAITINFO": true, |
|
"SYS_EPOLL_WAIT": true, |
|
"SYS_EPOLL_PWAIT": true, |
|
"SYS_POLL": true, |
|
"SYS_PPOLL": true, |
|
"SYS_SELECT": true, |
|
"SYS_PSELECT": true, |
|
"SYS_MSGRCV": true, |
|
"SYS_MSGSND": true, |
|
"SYS_SEMOP": true, |
|
"SYS_SEMTIMEDOP": true, |
|
"SYS_CLOCK_NANOSLEEP": true, |
|
"SYS_NANOSLEEP": true, |
|
"SYS_USLEEP": true, |
|
"SYS_IO_GETEVENTS": true, |
|
} |
|
|
|
type call struct { |
|
*node |
|
file string |
|
line int |
|
offset int |
|
syscall string |
|
} |
|
|
|
type node struct { |
|
name string |
|
callers []call |
|
calls []call |
|
} |
|
|
|
func (n *node) Node() *node { |
|
return n |
|
} |
|
|
|
var rgxNormalize = regexp.MustCompile(`([\(\)\*])`) |
|
|
|
type graph struct { |
|
rev string |
|
mainPkg string |
|
extSyscallPkgs []string |
|
syscallPkgs []string |
|
nodes map[string]*node |
|
syscalls []call |
|
fset *token.FileSet |
|
} |
|
|
|
func main() { |
|
x := exec.Command("go", "env", "GOPATH") |
|
p, err := x.CombinedOutput() |
|
if err != nil { |
|
panic(err) |
|
} |
|
p = bytes.TrimRight(p, "\n") |
|
gopath := string(p) |
|
|
|
results_docker := run(gopath, "github.com/docker/cli/cmd/docker", "-mod=vendor -tags=netgo -tags=osusergo -tags=static_build", []string{"golang.org/x/sys/unix"}) |
|
results_dockerd := run(gopath, "github.com/docker/docker/cmd/dockerd", "-mod=vendor -tags=netgo -tags=osusergo -tags=static_build", []string{"golang.org/x/sys/unix", "github.com/cilium/ebpf/internal/unix"}) |
|
results_containerd := run(gopath, "github.com/containerd/containerd/cmd/containerd", "-mod=vendor -tags=netgo -tags=osusergo -tags=static_build -tags=no_btrfs", []string{"golang.org/x/sys/unix", "github.com/cilium/ebpf/internal/unix"}) |
|
results_containerd_shim := run(gopath, "github.com/containerd/containerd/cmd/containerd-shim", "-mod=vendor -tags=netgo -tags=osusergo -tags=static_build", []string{"golang.org/x/sys/unix"}) |
|
results_containerd_shim_runc_v1 := run(gopath, "github.com/containerd/containerd/cmd/containerd-shim-runc-v1", "-mod=vendor -tags=netgo -tags=osusergo -tags=static_build", []string{"golang.org/x/sys/unix", "github.com/cilium/ebpf/internal/unix"}) |
|
results_containerd_shim_runc_v2 := run(gopath, "github.com/containerd/containerd/cmd/containerd-shim-runc-v2", "-mod=vendor -tags=netgo -tags=osusergo -tags=static_build", []string{"golang.org/x/sys/unix", "github.com/cilium/ebpf/internal/unix"}) |
|
results_runc := run(gopath, "github.com/opencontainers/runc", "-mod=vendor -tags=netgo -tags=osusergo -tags=static_build", []string{"golang.org/x/sys/unix", "github.com/cilium/ebpf/internal/unix"}) |
|
results_proxy := run(gopath, "github.com/docker/libnetwork/cmd/proxy", "-mod=vendor -tags=netgo -tags=osusergo -tags=static_build", []string{"golang.org/x/sys/unix"}) |
|
|
|
for _, x := range []struct{ |
|
name string |
|
results []result |
|
}{ |
|
{"docker (cli)",results_docker}, |
|
{"dockerd", results_dockerd}, |
|
{"docker-proxy", results_proxy}, |
|
{"containerd", results_containerd}, |
|
{"containerd-shim", results_containerd_shim}, |
|
{"containerd-shim-runc-v1", results_containerd_shim_runc_v1}, |
|
{"containerd-shim-runc-v2", results_containerd_shim_runc_v2}, |
|
{"runc", results_runc}, |
|
} { |
|
writeResults(x.name + ".csv", x.results) |
|
} |
|
} |
|
|
|
func writeResults(csv string, results []result) { |
|
f, err := os.Create(csv) |
|
if err != nil { |
|
panic(err) |
|
} |
|
defer f.Close() |
|
m := map[string]map[string]struct{}{} |
|
for _, r := range results { |
|
x := m[r.trap] |
|
if x == nil { |
|
x = map[string]struct{}{} |
|
} |
|
x[r.link] = struct{}{} |
|
m[r.trap] = x |
|
} |
|
type row struct { |
|
trap string |
|
links []string |
|
} |
|
a := make([]row, 0, len(m)) |
|
for trap, results := range m { |
|
links := make([]string, 0, len(results)) |
|
for r := range results { |
|
links = append(links, r) |
|
} |
|
sort.Strings(links) |
|
a = append(a, row{trap, links}) |
|
} |
|
sort.Slice(a, func(i, j int) bool { return a[i].trap < a[j].trap }) |
|
|
|
fmt.Fprintln(f, "Trap,OK?,Not OK?,In man page?,Comment,Callsite") |
|
for _, row := range a { |
|
inManpage := "" |
|
if eintr_syscalls[row.trap] { |
|
inManpage = "x" |
|
} |
|
for _, link := range row.links { |
|
fmt.Fprintf(f, "\"%s\",,,\"%s\",,\"%s\"\n", row.trap, inManpage, link) |
|
} |
|
} |
|
} |
|
|
|
type result struct { |
|
trap string |
|
link string |
|
} |
|
|
|
func run(gopath, mainPkg, goflags string, extSyscallPackages []string) (results []result) { |
|
source := fmt.Sprintf("%s/src/%s", gopath, mainPkg) |
|
|
|
g := newGraph() |
|
if !strings.HasPrefix(mainPkg, "github.com/") { |
|
panic("only github.com is supported") |
|
} |
|
parts := strings.SplitN(mainPkg, "/", 4) |
|
repo := fmt.Sprintf("%s/%s/%s", parts[0], parts[1], parts[2]) |
|
g.extSyscallPkgs = extSyscallPackages |
|
for _, pkg := range extSyscallPackages { |
|
g.extSyscallPkgs = append(g.extSyscallPkgs, repo+"/vendor/"+pkg) |
|
g.syscallPkgs = append(g.syscallPkgs, repo+"/vendor/"+pkg) |
|
} |
|
g.syscallPkgs = append([]string{"syscall"}, g.extSyscallPkgs...) |
|
|
|
// pta and rta algorithms yield different results, so let's merge both by default |
|
for _, algo := range []string{"pta", "rta"} { |
|
filename := "zzz.callgraph." + filepath.Base(source) + "." + algo |
|
var r io.Reader |
|
fr, err := os.Open(filename) |
|
if err == nil { |
|
r = fr |
|
} else if os.IsNotExist(err) { |
|
fw, err := os.Create(filename) |
|
if err != nil { |
|
panic(err) |
|
} |
|
|
|
rev, err := exec.Command("git", "-C", source, "rev-parse", "HEAD").CombinedOutput() |
|
if err != nil { |
|
panic(fmt.Errorf("%s", rev)) |
|
} |
|
rev = bytes.TrimRight(rev, "\n") |
|
g.rev = string(rev) |
|
fmt.Fprintln(fw, g.rev) |
|
|
|
cmd := exec.Command("callgraph", "-algo", algo, "-format", "{{.Caller}} | {{.Callee}} | {{.Filename}}:{{.Line}}:{{.Column}}:{{.Offset}}", source) |
|
cmd.Env = append(os.Environ(), "CGO_ENABLED=0", "GOFLAGS="+goflags) |
|
cmd.Stderr = os.Stderr |
|
stdout, err := cmd.StdoutPipe() |
|
if err != nil { |
|
panic(err) |
|
} |
|
r = io.TeeReader(stdout, fw) |
|
go func() { |
|
t := time.Now() |
|
if err := cmd.Run(); err != nil { |
|
panic(err) |
|
} |
|
fmt.Fprintf(os.Stderr, "callgraph generation took %s\n", time.Since(t)) |
|
}() |
|
} else { |
|
panic(err) |
|
} |
|
|
|
g.fromReader(r) |
|
} |
|
|
|
// find all callsites where direct syscalls are made |
|
chains, _ := g.visit(map[call]struct{}{}, nil, g.syscalls...) |
|
|
|
for _, chain := range chains { |
|
var r result |
|
for i, x := range chain { |
|
if i == 0 { |
|
// only interested in source for user function |
|
// Assuming github |
|
r.link = fmt.Sprintf("https://%s/blob/%s/%s#L%d", repo, g.rev, x.file[len(gopath)+len("/src/")+len(repo)+1:], x.line) |
|
//fmt.Printf("%s:%d\t%s", x.file, x.line, x.name) |
|
} else { |
|
//fmt.Print(" -> ", x.name) |
|
} |
|
if r.trap == "" && x.syscall != "" { |
|
k := strings.LastIndex(x.syscall, ".") |
|
if k < 0 { |
|
r.trap = x.syscall |
|
} else { |
|
r.trap = x.syscall[k+1:] |
|
} |
|
} |
|
} |
|
results = append(results, r) |
|
//fmt.Printf(" %s\n", r.trap) |
|
} |
|
return results |
|
} |
|
|
|
func (g *graph) isStdlib(name string) (res bool) { |
|
i := strings.Index(name, "/") |
|
if i < 0 { |
|
return !strings.HasPrefix(name, g.mainPkg+".") |
|
} |
|
// TODO: match with `go list std` instead of checking whether first path component has `.` although this is a good enough to check if it's stdlib. |
|
j := strings.Index(name[:i], ".") |
|
return j < 0 || strings.HasSuffix(name[:i], ".init") |
|
} |
|
|
|
func (g *graph) isLeafSyscall(s string) bool { |
|
for _, fn := range []string{".Syscall", ".RawSyscall", ".PtraceSyscall", ".schedAffinity"} { |
|
for _, pkg := range g.syscallPkgs { |
|
if strings.HasPrefix(s, pkg+fn) { |
|
return true |
|
} |
|
} |
|
} |
|
return false |
|
} |
|
|
|
func (g *graph) isExternalSyscallPackage(name string) bool { |
|
for _, s := range g.extSyscallPkgs { |
|
if strings.HasPrefix(name, s+".") { |
|
return true |
|
} |
|
} |
|
return false |
|
} |
|
|
|
func (g *graph) visit(seen map[call]struct{}, syscall *call, calls ...call) (retChains [][]call, _ map[call]struct{}) { |
|
for _, c := range calls { |
|
n := c.node |
|
if n == nil { |
|
panic("node must not be nil") |
|
} |
|
name := normalize(n.name) |
|
if _, ok := seen[c]; ok { |
|
continue |
|
} |
|
if syscall != nil { |
|
arg := getTrap(c.file, c.offset) |
|
c.syscall = arg |
|
} |
|
if g.isStdlib(name) { |
|
// not interested in stdlib functions that are not syscalls |
|
if !strings.HasPrefix(name, "syscall.") { |
|
continue |
|
} |
|
} else if !g.isExternalSyscallPackage(name) { |
|
// if it's not an stdlib function nor a syscall library, then it's our first user function |
|
retChains = append(retChains, []call{c}) |
|
// do not call visit recursively as we do not care about the callers of the user function |
|
continue |
|
} |
|
|
|
// Here current function is from the syscall package, so let's recurse on its callers |
|
|
|
// Mark as seen only if from syscall package, as there can be multiple codepaths with calls to different syscalls |
|
|
|
var sys *call |
|
if g.isLeafSyscall(name) { |
|
sys = &c |
|
} |
|
seen[c] = struct{}{} |
|
|
|
var chains [][]call |
|
chains, seen = g.visit(seen, sys, n.callers...) |
|
// and add the syscall to the chain returned. |
|
for _, chain := range chains { |
|
retChains = append(retChains, append(chain, c)) |
|
} |
|
// Note that if syscall is only called by stdlib functions, chains will be empty and syscall function will not be added to retChains. |
|
} |
|
return retChains, seen |
|
} |
|
|
|
func normalize(s string) string { |
|
return rgxNormalize.ReplaceAllString(s, "") |
|
} |
|
|
|
func (g *graph) fromReader(r io.Reader) { |
|
s := bufio.NewScanner(r) |
|
if s.Scan() { |
|
g.rev = s.Text() |
|
} |
|
for s.Scan() { |
|
b := s.Bytes() |
|
if len(b) == 0 || b[0] == '#' { |
|
continue |
|
} |
|
// slices[0] = caller |
|
// slices[1] = callee |
|
// slices[2] = callsite |
|
slices := strings.Split(s.Text(), " | ") |
|
if len(slices) < 3 { |
|
panic(s.Text()) |
|
} |
|
// handle if source is not a package but a single file |
|
for i, slice := range slices[:2] { |
|
if strings.HasPrefix(slice, "command-line-arguments") { |
|
slices[i] = g.mainPkg + slice[len("command-line-arguments"):] |
|
} |
|
} |
|
callerNode, ok := g.nodes[slices[0]] |
|
if !ok { |
|
// if caller node was not already encountered, create it and add it to map |
|
callerNode = &node{name: slices[0]} |
|
g.nodes[slices[0]] = callerNode |
|
} |
|
parts := strings.Split(slices[2], ":") |
|
line, err := strconv.Atoi(parts[1]) |
|
if err != nil { |
|
panic(err) |
|
} |
|
offset, err := strconv.Atoi(parts[3]) |
|
if err != nil { |
|
panic(err) |
|
} |
|
c := call{file: parts[0], line: line, offset: offset} |
|
|
|
calleeNode, ok := g.nodes[slices[1]] |
|
// copy callsites for both caller and callee |
|
caller, callee := c, c |
|
caller.node = callerNode |
|
callee.node = calleeNode |
|
if !ok { |
|
// if callee node was not already encountered, create it and add it to map |
|
calleeNode = &node{name: slices[1]} |
|
callee.node = calleeNode |
|
g.nodes[slices[1]] = calleeNode |
|
// if callee is a syscall, then record it |
|
if g.isLeafSyscall(slices[1]) { |
|
// do not reuse `callee` |
|
g.syscalls = append(g.syscalls, call{node: calleeNode}) |
|
} |
|
} |
|
|
|
callerNode.calls = append(callerNode.calls, callee) |
|
calleeNode.callers = append(calleeNode.callers, caller) |
|
} |
|
if err := s.Err(); err != nil { |
|
panic(err) |
|
} |
|
} |
|
|
|
func newGraph() *graph { |
|
return &graph{ |
|
nodes: make(map[string]*node), |
|
} |
|
} |
|
|
|
func getTrap(filename string, offset int) string { |
|
f, err := os.Open(filename) |
|
if err != nil { |
|
panic(err) |
|
} |
|
defer f.Close() |
|
// offset+1: skip '(' in function call such as Syscall(SYS_READ, ...) |
|
if _, err := f.Seek(int64(offset+1), 0); err != nil { |
|
panic(err) |
|
} |
|
s := bufio.NewScanner(f) |
|
s.Split(func(data []byte, atEOF bool) (advance int, token []byte, err error) { |
|
if atEOF && len(data) == 0 { |
|
return 0, nil, nil |
|
} |
|
i := bytes.IndexFunc(data, func(r rune) bool { |
|
b := (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '_' || r == '.' |
|
return !b |
|
}) |
|
if i < 0 { |
|
return 0, nil, nil |
|
} |
|
return i, data[:i], nil |
|
}) |
|
if s.Scan() { |
|
return s.Text() |
|
} |
|
panic(s.Err()) |
|
} |