Skip to content

Instantly share code, notes, and snippets.

@luser
Created January 14, 2021 16:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save luser/a5c2ca08809d383971572a29ae9ed16d to your computer and use it in GitHub Desktop.
Save luser/a5c2ca08809d383971572a29ae9ed16d to your computer and use it in GitHub Desktop.
Illustration of a potential issue with subprocesses in Go

I ran into this issue at work in a test runner we have that is written in Go and runs a test suite which is a node module, which itself spawns several subprocesses. The issue was that one of the subprocesses inherited its parents' stdout file descriptor, but would outlive the parent process being killed. If you use Go's exec.Cmd to run a process and set its Stdout or Stderr to something that is not a File, a goroutine will be spawned to drive that I/O when calling Cmd.Start, and all such goroutines will be run to completion as part of Cmd.Wait. In this situation, because the grandchild process stays alive, it keeps the stdout pipe open, thus Cmd.Wait blocks until it exits.

subprocess.go is a minimal testcase to display the issue. It spawns itself as a child process with piped stdout, then waits one second before killing the subprocess and waiting on it. The child process spawns itself as a grandchild with inherited stdout and sleeps for 5 seconds before exiting. The grandchild process simply sleeps for 5 seconds before exiting. When run, you can see that the first process winds up waiting until the grandchild process exits (the child and grandchild processes prefix their output with their pid in brackets):

$ go run subprocess.go
[281ns] Spawning child process
[443.594µs] Sleeping for one second
[4086632][1.455µs] Child process started
[4086632][76.366µs] Spawning grandchild
[4086632][489.851µs] Sleeping for 5 seconds
[4086637][1.631µs] Grandchild process started
[4086637][48.321µs] Sleeping for 5 seconds
[1.000591184s] Killing child process 4086632
[1.000716182s] Waiting for child process
[4086637][5.000200347s] Done
[5.005455282s] Child process was killed
[5.005474676s] Done!

subprocess_setpgid.go is the same program but with my suggested fix: putting the child process in a new process group and then killing the process group so that the grandchild process is also killed. Running this version shows that the first process receives the child processes' exit status almost immediately upon calling Cmd.Wait, and the grandchild process does not run to completion:

$ go run subprocess_setpgid.go
[215ns] Spawning child process
[432.164µs] Sleeping for one second
[4086776][1.133µs] Child process started
[4086776][31.065µs] Spawning grandchild
[4086776][312.907µs] Sleeping for 5 seconds
[4086781][659ns] Grandchild process started
[4086781][15.239µs] Sleeping for 5 seconds
[1.00059334s] Killing child process group 4086776
[1.000691397s] Waiting for child process
[1.001147315s] Child process was killed
[1.001169014s] Done!
package main
import (
"bytes"
"errors"
"fmt"
"io"
"os"
"os/exec"
"time"
)
func spawn_child_kill_wait() {
start := time.Now()
fmt.Printf("[%v] Spawning child process\n", time.Since(start))
var buf bytes.Buffer
stdoutf := io.MultiWriter(&buf, os.Stdout)
cmd := exec.Command(os.Args[0], "child")
cmd.Stdout = stdoutf
cmd.Stderr = os.Stderr
if err := cmd.Start(); err != nil {
fmt.Fprintf(os.Stderr, "[%v]: %v\n", time.Since(start), err)
return
}
fmt.Printf("[%v] Sleeping for one second\n", time.Since(start))
time.Sleep(time.Second)
fmt.Printf("[%v] Killing child process %v\n", time.Since(start), cmd.Process.Pid)
if err := cmd.Process.Kill(); err != nil {
fmt.Fprintf(os.Stderr, "[%v]: %v\n", time.Since(start), err)
return
}
fmt.Printf("[%v] Waiting for child process\n", time.Since(start))
err := cmd.Wait()
var eerr *exec.ExitError
if errors.As(err, &eerr) {
fmt.Printf("[%v] Child process was killed\n", time.Since(start))
} else if err != nil {
fmt.Fprintf(os.Stderr, "[%v] Wait Error: %v\n", time.Since(start), err)
}
fmt.Printf("[%v] Done!\n", time.Since(start))
}
func run_child() {
start := time.Now()
p := os.Getpid()
fmt.Printf("[%v][%v] Child process started\n", p, time.Since(start))
fmt.Printf("[%v][%v] Spawning grandchild\n", p, time.Since(start))
cmd := exec.Command(os.Args[0], "grandchild")
cmd.Stdout = os.Stdout
if err := cmd.Start(); err != nil {
fmt.Fprintf(os.Stderr, "[%v][%v]: %v\n", p, err, time.Since(start))
return
}
fmt.Printf("[%v][%v] Sleeping for 5 seconds\n", p, time.Since(start))
time.Sleep(5 * time.Second)
fmt.Printf("[%v][%v] Done\n", p, time.Since(start))
}
func run_grandchild() {
start := time.Now()
p := os.Getpid()
fmt.Printf("[%v][%v] Grandchild process started\n", p, time.Since(start))
fmt.Printf("[%v][%v] Sleeping for 5 seconds\n", p, time.Since(start))
time.Sleep(5 * time.Second)
fmt.Printf("[%v][%v] Done\n", p, time.Since(start))
}
func main() {
if len(os.Args) == 1 {
spawn_child_kill_wait()
} else if os.Args[1] == "child" {
run_child()
} else if os.Args[1] == "grandchild" {
run_grandchild()
}
}
package main
import (
"bytes"
"errors"
"fmt"
"io"
"os"
"os/exec"
"syscall"
"time"
)
func spawn_child_kill_wait() {
start := time.Now()
fmt.Printf("[%v] Spawning child process\n", time.Since(start))
var buf bytes.Buffer
stdoutf := io.MultiWriter(&buf, os.Stdout)
cmd := exec.Command(os.Args[0], "child")
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
cmd.Stdout = stdoutf
cmd.Stderr = os.Stderr
if err := cmd.Start(); err != nil {
fmt.Fprintf(os.Stderr, "[%v]: %v\n", time.Since(start), err)
return
}
fmt.Printf("[%v] Sleeping for one second\n", time.Since(start))
time.Sleep(time.Second)
fmt.Printf("[%v] Killing child process group %v\n", time.Since(start), cmd.Process.Pid)
if err := syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL); err != nil {
fmt.Fprintf(os.Stderr, "[%v]: %v\n", time.Since(start), err)
return
}
fmt.Printf("[%v] Waiting for child process\n", time.Since(start))
err := cmd.Wait()
var eerr *exec.ExitError
if errors.As(err, &eerr) {
fmt.Printf("[%v] Child process was killed\n", time.Since(start))
} else if err != nil {
fmt.Fprintf(os.Stderr, "[%v] Wait Error: %v\n", time.Since(start), err)
}
fmt.Printf("[%v] Done!\n", time.Since(start))
}
func run_child() {
start := time.Now()
p := os.Getpid()
fmt.Printf("[%v][%v] Child process started\n", p, time.Since(start))
fmt.Printf("[%v][%v] Spawning grandchild\n", p, time.Since(start))
cmd := exec.Command(os.Args[0], "grandchild")
cmd.Stdout = os.Stdout
if err := cmd.Start(); err != nil {
fmt.Fprintf(os.Stderr, "[%v][%v]: %v\n", p, err, time.Since(start))
return
}
fmt.Printf("[%v][%v] Sleeping for 5 seconds\n", p, time.Since(start))
time.Sleep(5 * time.Second)
fmt.Printf("[%v][%v] Done\n", p, time.Since(start))
}
func run_grandchild() {
start := time.Now()
p := os.Getpid()
fmt.Printf("[%v][%v] Grandchild process started\n", p, time.Since(start))
fmt.Printf("[%v][%v] Sleeping for 5 seconds\n", p, time.Since(start))
time.Sleep(5 * time.Second)
fmt.Printf("[%v][%v] Done\n", p, time.Since(start))
}
func main() {
if len(os.Args) == 1 {
spawn_child_kill_wait()
} else if os.Args[1] == "child" {
run_child()
} else if os.Args[1] == "grandchild" {
run_grandchild()
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment