Skip to content

Commit 165dfd6

Browse files
committed
daemon: fix restoring container with missing task
Before 4bafaa0, if the daemon was killed while a container was running and the container shim is killed before the daemon is restarted, such as if the host system is hard-rebooted, the daemon would restore the container to the stopped state and set the exit code to 255. The aforementioned commit introduced a regression where the container's exit code would instead be set to 0. Fix the regression so that the exit code is once against set to 255 on restore. Signed-off-by: Cory Snider <csnider@mirantis.com>
1 parent 8d070e3 commit 165dfd6

3 files changed

Lines changed: 93 additions & 17 deletions

File tree

daemon/daemon.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,8 @@ func (daemon *Daemon) restore(cfg *configStore) error {
447447
if es != nil {
448448
ces.ExitCode = int(es.ExitCode())
449449
ces.ExitedAt = es.ExitTime()
450+
} else {
451+
ces.ExitCode = 255
450452
}
451453
c.SetStopped(&ces)
452454
daemon.Cleanup(c)

integration/container/daemon_linux_test.go

Lines changed: 73 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,8 @@ package container // import "github.com/docker/docker/integration/container"
22

33
import (
44
"context"
5-
"encoding/json"
65
"fmt"
76
"os"
8-
"path/filepath"
97
"strconv"
108
"strings"
119
"testing"
@@ -19,6 +17,7 @@ import (
1917
"golang.org/x/sys/unix"
2018
"gotest.tools/v3/assert"
2119
is "gotest.tools/v3/assert/cmp"
20+
"gotest.tools/v3/assert/opt"
2221
"gotest.tools/v3/skip"
2322
)
2423

@@ -204,21 +203,10 @@ func TestRestartDaemonWithRestartingContainer(t *testing.T) {
204203

205204
d.Stop(t)
206205

207-
configPath := filepath.Join(d.Root, "containers", id, "config.v2.json")
208-
configBytes, err := os.ReadFile(configPath)
209-
assert.NilError(t, err)
210-
211-
var c realcontainer.Container
212-
213-
assert.NilError(t, json.Unmarshal(configBytes, &c))
214-
215-
c.State = realcontainer.NewState()
216-
c.SetRestarting(&realcontainer.ExitStatus{ExitCode: 1})
217-
c.HasBeenStartedBefore = true
218-
219-
configBytes, err = json.Marshal(&c)
220-
assert.NilError(t, err)
221-
assert.NilError(t, os.WriteFile(configPath, configBytes, 0600))
206+
d.TamperWithContainerConfig(t, id, func(c *realcontainer.Container) {
207+
c.SetRestarting(&realcontainer.ExitStatus{ExitCode: 1})
208+
c.HasBeenStartedBefore = true
209+
})
222210

223211
d.Start(t)
224212

@@ -231,3 +219,71 @@ func TestRestartDaemonWithRestartingContainer(t *testing.T) {
231219
assert.NilError(t, err)
232220
}
233221
}
222+
223+
// TestHardRestartWhenContainerIsRunning simulates a case where dockerd is
224+
// killed while a container is running, and the container's task no longer
225+
// exists when dockerd starts back up. This can happen if the system is
226+
// hard-rebooted, for example.
227+
//
228+
// Regression test for moby/moby#45788
229+
func TestHardRestartWhenContainerIsRunning(t *testing.T) {
230+
skip.If(t, testEnv.IsRemoteDaemon, "cannot start daemon on remote test run")
231+
skip.If(t, testEnv.DaemonInfo.OSType == "windows")
232+
233+
t.Parallel()
234+
235+
d := daemon.New(t)
236+
defer d.Cleanup(t)
237+
238+
d.StartWithBusybox(t, "--iptables=false")
239+
defer d.Stop(t)
240+
241+
ctx := context.Background()
242+
client := d.NewClientT(t)
243+
244+
// Just create the containers, no need to start them.
245+
// We really want to make sure there is no process running when docker starts back up.
246+
// We will manipulate the on disk state later.
247+
nopolicy := container.Create(ctx, t, client, container.WithCmd("/bin/sh", "-c", "exit 1"))
248+
onfailure := container.Create(ctx, t, client, container.WithRestartPolicy("on-failure"), container.WithCmd("/bin/sh", "-c", "sleep 60"))
249+
250+
d.Stop(t)
251+
252+
for _, id := range []string{nopolicy, onfailure} {
253+
d.TamperWithContainerConfig(t, id, func(c *realcontainer.Container) {
254+
c.SetRunning(nil, nil, true)
255+
c.HasBeenStartedBefore = true
256+
})
257+
}
258+
259+
d.Start(t)
260+
261+
t.Run("RestartPolicy=none", func(t *testing.T) {
262+
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
263+
defer cancel()
264+
inspect, err := client.ContainerInspect(ctx, nopolicy)
265+
assert.NilError(t, err)
266+
assert.Check(t, is.Equal(inspect.State.Status, "exited"))
267+
assert.Check(t, is.Equal(inspect.State.ExitCode, 255))
268+
finishedAt, err := time.Parse(time.RFC3339Nano, inspect.State.FinishedAt)
269+
if assert.Check(t, err) {
270+
assert.Check(t, is.DeepEqual(finishedAt, time.Now(), opt.TimeWithThreshold(time.Minute)))
271+
}
272+
})
273+
274+
t.Run("RestartPolicy=on-failure", func(t *testing.T) {
275+
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
276+
defer cancel()
277+
inspect, err := client.ContainerInspect(ctx, onfailure)
278+
assert.NilError(t, err)
279+
assert.Check(t, is.Equal(inspect.State.Status, "running"))
280+
assert.Check(t, is.Equal(inspect.State.ExitCode, 0))
281+
finishedAt, err := time.Parse(time.RFC3339Nano, inspect.State.FinishedAt)
282+
if assert.Check(t, err) {
283+
assert.Check(t, is.DeepEqual(finishedAt, time.Now(), opt.TimeWithThreshold(time.Minute)))
284+
}
285+
286+
stopTimeout := 0
287+
assert.Assert(t, client.ContainerStop(ctx, onfailure, containerapi.StopOptions{Timeout: &stopTimeout}))
288+
})
289+
}

testutil/daemon/daemon.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"github.com/docker/docker/api/types"
1717
"github.com/docker/docker/api/types/events"
1818
"github.com/docker/docker/client"
19+
"github.com/docker/docker/container"
1920
"github.com/docker/docker/pkg/ioutils"
2021
"github.com/docker/docker/pkg/stringid"
2122
"github.com/docker/docker/testutil/request"
@@ -825,6 +826,23 @@ func (d *Daemon) Info(t testing.TB) types.Info {
825826
return info
826827
}
827828

829+
// TamperWithContainerConfig modifies the on-disk config of a container.
830+
func (d *Daemon) TamperWithContainerConfig(t testing.TB, containerID string, tamper func(*container.Container)) {
831+
t.Helper()
832+
833+
configPath := filepath.Join(d.Root, "containers", containerID, "config.v2.json")
834+
configBytes, err := os.ReadFile(configPath)
835+
assert.NilError(t, err)
836+
837+
var c container.Container
838+
assert.NilError(t, json.Unmarshal(configBytes, &c))
839+
c.State = container.NewState()
840+
tamper(&c)
841+
configBytes, err = json.Marshal(&c)
842+
assert.NilError(t, err)
843+
assert.NilError(t, os.WriteFile(configPath, configBytes, 0600))
844+
}
845+
828846
// cleanupRaftDir removes swarmkit wal files if present
829847
func cleanupRaftDir(t testing.TB, d *Daemon) {
830848
t.Helper()

0 commit comments

Comments
 (0)