Skip to content

Instantly share code, notes, and snippets.

@paralin
Created April 4, 2022 17:40
Show Gist options
  • Save paralin/ad4279d14a588eb3e897519b9f299907 to your computer and use it in GitHub Desktop.
Save paralin/ad4279d14a588eb3e897519b9f299907 to your computer and use it in GitHub Desktop.
Podman: check for container start failure
// Check if the pod already exists.
c.le.Debug("podman: checking if pod exists")
podExists, err := engine.PodExists(ctx, podObj.Name)
if err != nil {
return err
}
if podExists.Value {
c.le.Debug("podman: bringing down old pod version")
if err := runKubeDown(ctx); err != nil {
return err
}
}
c.le.Debug("podman: creating pod")
report, err := engine.PlayKube(ctx, bytes.NewReader(podYAML), pentities.PlayKubeOptions{
Replace: true,
})
if err != nil {
return errors.Wrap(err, "podman: play kube pod")
}
var podRemoved bool
if !noRemovePods {
defer func() {
if !podRemoved {
_ = runKubeDown(context.Background())
}
}()
}
if len(report.Pods) != 1 {
c.le.Errorf("expected 1 pod but podman created %d", len(report.Pods))
return errors.New("failed to create podman pod")
}
runningPod := &report.Pods[0]
podID := runningPod.ID
runningContainersIDs := runningPod.Containers
WaitContainer:
le := c.le.
WithField("pod-id", podID).
WithField("pod-name", podName)
le.Debugf(
"podman: waiting for pod container(s) to exit: %v",
// len(runningContainersIDs),
runningContainersIDs,
)
waitReport, err := engine.ContainerWait(
ctx,
runningContainersIDs,
pentities.WaitOptions{
Interval: time.Millisecond * 250,
Condition: []pdefine.ContainerStatus{
pdefine.ContainerStateExited,
pdefine.ContainerStateStopped,
},
},
)
if err != nil {
return err
}
var podErr error
checkErr := func(err error, exitCode int32) error {
if podErr != nil {
return podErr
}
if err != nil {
podErr = err
} else if exitCode != 0 {
podErr = errors.Errorf("container exited with code: %d", exitCode)
}
return podErr
}
for _, res := range waitReport {
if checkErr(res.Error, res.ExitCode) != nil {
break
}
}
var failedToStart []string
if podErr == nil {
inspectReport, inspectErrs, err := engine.ContainerInspect(ctx, runningContainersIDs, pentities.InspectOptions{})
if err != nil {
return err
}
for i, inspectErr := range inspectErrs {
if inspectErr != nil {
podErr = errors.Wrapf(inspectErr, "error inspecting container %s", runningContainersIDs[i])
}
if podErr != nil {
break
}
}
for _, rep := range inspectReport {
if podErr != nil {
break
}
if rep == nil || rep.State == nil {
continue
}
exitCode := rep.State.ExitCode
if checkErr(nil, exitCode) != nil {
break
}
status := rep.State.Status
if status == pdefine.ContainerStateCreated.String() ||
status == pdefine.ContainerStateConfigured.String() {
// The container most likely did not start properly.
failedToStart = append(failedToStart, rep.ID)
}
}
}
if podErr == nil && len(failedToStart) != 0 {
le.WithError(err).Warnf("container failed to start: %v", failedToStart)
// We cannot determine the error from the inspect report:
// See: https://github.com/containers/podman/issues/13729
startRep, err := engine.ContainerStart(ctx, failedToStart, pentities.ContainerStartOptions{})
if err == nil && startRep == nil {
err = errors.New("container start returned empty response")
}
if err == nil {
for _, rep := range startRep {
err = checkErr(rep.Err, int32(rep.ExitCode))
}
}
if err != nil {
podErr = errors.Wrap(err, "container failed to start")
} else {
// the containers started successfully?
le.Warnf("started stalled containers: %v", failedToStart)
goto WaitContainer
}
}
if err := podErr; err != nil {
le.WithError(err).Warn("pod exited with error")
} else {
le.Debug("pod exited successfully")
}
// done
return podErr
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment