-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Description
Description
containerd shows a container as running, although the PID for that container is absent. furthermore, when inspecting the pod cgroup (v2), we only find the /pause process, and when restarting containerd, the container is effectively marked as exited.
Steps to reproduce the issue
we haven't found a way to reliably reproduce the bug. we notice it rarely happens with redis containers when they get OOM killed. we would be happy to get some tips to try to reproduce it.
Describe the results you received and expected
the container is showing as RUNNING, whereas the process of the container is not running, and we didn't expect that containerd wasn't aware of container process being stopped
What version of containerd are you using?
containerd github.com/containerd/containerd v1.7.20 8fc6bcf
Any other relevant information
Kubernetes v1.29 (kubeadm-deployed), containerd v1.20, Debian 12.
runc --version
runc --version
runc version 1.1.13
commit: v1.1.13-0-g58aa9203-dirty
spec: 1.0.2-dev
go: go1.21.11
libseccomp: 2.5.5crictl info
crictl info
{
"status": {
"conditions": [
{
"type": "RuntimeReady",
"status": true,
"reason": "",
"message": ""
},
{
"type": "NetworkReady",
"status": true,
"reason": "",
"message": ""
},
{
"type": "ContainerdHasNoDeprecationWarnings",
"status": true,
"reason": "",
"message": ""
}
]
},
"cniconfig": {
"PluginDirs": [
"/opt/cni/bin"
],
"PluginConfDir": "/etc/cni/net.d",
"PluginMaxConfNum": 1,
"Prefix": "eth",
"Networks": [
{
"Config": {
"Name": "cni-loopback",
"CNIVersion": "0.3.1",
"Plugins": [
{
"Network": {
"type": "loopback",
"ipam": {},
"dns": {}
},
"Source": "{\"type\":\"loopback\"}"
}
],
"Source": "{\n\"cniVersion\": \"0.3.1\",\n\"name\": \"cni-loopback\",\n\"plugins\": [{\n \"type\": \"loopback\"\n}]\n}"
},
"IFName": "lo"
},
{
"Config": {
"Name": "cilium",
"CNIVersion": "0.3.1",
"Plugins": [
{
"Network": {
"type": "cilium-cni",
"ipam": {},
"dns": {}
},
"Source": "{\"enable-debug\":false,\"log-file\":\"/var/run/cilium/cilium-cni.log\",\"type\":\"cilium-cni\"}"
}
],
"Source": "\n{\n \"cniVersion\": \"0.3.1\",\n \"name\": \"cilium\",\n \"plugins\": [\n {\n \"type\": \"cilium-cni\",\n \"enable-debug\": false,\n \"log-file\": \"/var/run/cilium/cilium-cni.log\"\n }\n ]\n}"
},
"IFName": "eth0"
}
]
},
"config": {
"containerd": {
"snapshotter": "overlayfs",
"defaultRuntimeName": "runc",
"defaultRuntime": {
"runtimeType": "",
"runtimePath": "",
"runtimeEngine": "",
"PodAnnotations": null,
"ContainerAnnotations": null,
"runtimeRoot": "",
"options": null,
"privileged_without_host_devices": false,
"privileged_without_host_devices_all_devices_allowed": false,
"baseRuntimeSpec": "",
"cniConfDir": "",
"cniMaxConfNum": 0,
"snapshotter": "",
"sandboxMode": ""
},
"untrustedWorkloadRuntime": {
"runtimeType": "",
"runtimePath": "",
"runtimeEngine": "",
"PodAnnotations": null,
"ContainerAnnotations": null,
"runtimeRoot": "",
"options": null,
"privileged_without_host_devices": false,
"privileged_without_host_devices_all_devices_allowed": false,
"baseRuntimeSpec": "",
"cniConfDir": "",
"cniMaxConfNum": 0,
"snapshotter": "",
"sandboxMode": ""
},
"runtimes": {
"runc": {
"runtimeType": "io.containerd.runc.v2",
"runtimePath": "",
"runtimeEngine": "",
"PodAnnotations": null,
"ContainerAnnotations": null,
"runtimeRoot": "",
"options": {
"SystemdCgroup": true
},
"privileged_without_host_devices": false,
"privileged_without_host_devices_all_devices_allowed": false,
"baseRuntimeSpec": "",
"cniConfDir": "",
"cniMaxConfNum": 0,
"snapshotter": "",
"sandboxMode": "podsandbox"
}
},
"noPivot": false,
"disableSnapshotAnnotations": true,
"discardUnpackedLayers": false,
"ignoreBlockIONotEnabledErrors": false,
"ignoreRdtNotEnabledErrors": false
},
"cni": {
"binDir": "/opt/cni/bin",
"confDir": "/etc/cni/net.d",
"maxConfNum": 1,
"setupSerially": false,
"confTemplate": "",
"ipPref": ""
},
"registry": {
"configPath": "/etc/containerd/certs.d",
"mirrors": null,
"configs": null,
"auths": null,
"headers": null
},
"imageDecryption": {
"keyModel": ""
},
"disableTCPService": true,
"streamServerAddress": "127.0.0.1",
"streamServerPort": "0",
"streamIdleTimeout": "4h0m0s",
"enableSelinux": false,
"selinuxCategoryRange": 1024,
"sandboxImage": "registry.k8s.io/pause:3.8",
"statsCollectPeriod": 10,
"systemdCgroup": false,
"enableTLSStreaming": false,
"x509KeyPairStreaming": {
"tlsCertFile": "",
"tlsKeyFile": ""
},
"maxContainerLogSize": 10000,
"disableCgroup": false,
"disableApparmor": false,
"restrictOOMScoreAdj": false,
"maxConcurrentDownloads": 3,
"disableProcMount": false,
"unsetSeccompProfile": "",
"tolerateMissingHugetlbController": true,
"disableHugetlbController": true,
"device_ownership_from_security_context": false,
"ignoreImageDefinedVolumes": false,
"netnsMountsUnderStateDir": false,
"enableUnprivilegedPorts": false,
"enableUnprivilegedICMP": false,
"enableCDI": true,
"cdiSpecDirs": [
"/etc/cdi",
"/var/run/cdi"
],
"imagePullProgressTimeout": "5m0s",
"drainExecSyncIOTimeout": "0s",
"imagePullWithSyncFs": false,
"ignoreDeprecationWarnings": null,
"containerdRootDir": "/var/lib/containerd",
"containerdEndpoint": "/run/containerd/containerd.sock",
"rootDir": "/var/lib/containerd/io.containerd.grpc.v1.cri",
"stateDir": "/run/containerd/io.containerd.grpc.v1.cri"
},
"golang": "go1.21.12",
"lastCNILoadStatus": "OK",
"lastCNILoadStatus.default": "OK"
}
uname -a
Linux t1-k8s-alsu025 6.1.0-21-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.1.90-1 (2024-05-03) x86_64 GNU/Linuxcrictl inspect
{
"status": {
"id": "2594a5faa42199dfcefb468de17fc8b2aceee27e31519e505aef73a2cb8eaee2",
"metadata": {
"attempt": 6,
"name": "redis"
},
"state": "CONTAINER_RUNNING",
"createdAt": "2024-08-05T20:29:52.3390638+02:00",
"startedAt": "2024-08-05T20:29:52.441284768+02:00",
"finishedAt": "0001-01-01T00:00:00Z",
"exitCode": 0,
"image": {
"annotations": {},
"image": ".../redis-cluster:6.2.12-debian-11-r23",
"userSpecifiedImage": ""
},
"reason": "OOMKilled",
"message": "",
"resources": {
"linux": {
"cpuPeriod": "100000",
"cpuQuota": "0",
"cpuShares": "51",
"cpusetCpus": "",
"cpusetMems": "",
"hugepageLimits": [],
"memoryLimitInBytes": "268435456",
"memorySwapLimitInBytes": "268435456",
"oomScoreAdj": "999",
"unified": {
"memory.oom.group": "1",
"memory.swap.max": "0"
}
},
"windows": null
}
},
"info": {
"sandboxID": "4db3bfdad71867f3edfe3146571b74b7beced902a0f5f0f6b908e0e43bdce2d2",
"pid": 2631444,
"removing": false,
"snapshotKey": "2594a5faa42199dfcefb468de17fc8b2aceee27e31519e505aef73a2cb8eaee2",
"snapshotter": "overlayfs",
"runtimeType": "io.containerd.runc.v2",
"runtimeOptions": {
"systemd_cgroup": true
},
"config": {
"metadata": {
"name": "redis",
"attempt": 6
},
"linux": {
"resources": {
"cpu_period": 100000,
"cpu_shares": 51,
"memory_limit_in_bytes": 268435456,
"oom_score_adj": 999,
"hugepage_limits": [
{
"page_size": "2MB"
},
{
"page_size": "1GB"
}
],
"unified": {
"memory.oom.group": "1",
"memory.swap.max": "0"
}
},
"security_context": {
"namespace_options": {
"pid": 1
},
"run_as_user": {
"value": 1001
},
"supplemental_groups": [
1001
],
"masked_paths": [
"/proc/asound",
"/proc/acpi",
"/proc/kcore",
"/proc/keys",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/proc/scsi",
"/sys/firmware"
],
"readonly_paths": [
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger"
],
"seccomp": {
"profile_type": 1
}
}
}
},
"runtimeSpec": {
"ociVersion": "1.1.0",
"process": {
"user": {
"uid": 1001,
"gid": 0,
"additionalGids": [
0,
1001
]
},
"oomScoreAdj": 999
},
"root": {
"path": "rootfs"
},
"linux": {
"resources": {
"devices": [
{
"allow": false,
"access": "rwm"
}
],
"memory": {
"limit": 268435456,
"swap": 268435456
},
"cpu": {
"shares": 51,
"period": 100000
},
"unified": {
"memory.oom.group": "1",
"memory.swap.max": "0"
}
},
"cgroupsPath": "kubepods-burstable-pod3bbf7d7a_a8d4_41bc_9cea_3e0e96321f65.slice:cri-containerd:2594a5faa42199dfcefb468de17fc8b2aceee27e31519e505aef73a2cb8eaee2",
"namespaces": [
{
"type": "pid"
},
{
"type": "ipc",
"path": "/proc/4056906/ns/ipc"
},
{
"type": "uts",
"path": "/proc/4056906/ns/uts"
},
{
"type": "mount"
},
{
"type": "network",
"path": "/proc/4056906/ns/net"
},
{
"type": "cgroup"
}
],
"maskedPaths": [
"/proc/asound",
"/proc/acpi",
"/proc/kcore",
"/proc/keys",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/proc/scsi",
"/sys/firmware"
],
"readonlyPaths": [
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger"
]
}
}
}
}systemd-cgls within the supposedly running pod
systemd-cgls
Working directory /sys/fs/cgroup/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod3bbf7d7a_a8d4_41bc_9cea_3e0e96321f65.slice:
└─cri-containerd-4db3bfdad71867f3edfe3146571b74b7beced902a0f5f0f6b908e0e43bdce2d2.scope … (#9418267)
→ user.invocation_id: 7acd8b7f51304a8c8bf691483b192621
→ trusted.invocation_id: 7acd8b7f51304a8c8bf691483b192621
→ user.delegate: 1
→ trusted.delegate: 1
└─4056906 /pause/cc @LeTT00r
Show configuration if it is related to CRI plugin.
No response