-
Notifications
You must be signed in to change notification settings - Fork 3.9k
Closed
Labels
Description
Description
Hi, I raised an issue opencontainers/runc#3448 in the runc repo, but as you can see the issue appears to be that containerd (1.6.2) is not started the created containers.
- The issue affected basically every node as soon as they came up
- Have other clusters running the same setup which makes me think it is triggered by load or a certain type of workload
- Kubernetes 1.22.8
- Most pods, after failing to start were then met with
OutOfcpumaking me think that the CPU requests were being accounted for even if nothing was running (all nodes basically idle). - Switching to docker CRI "fixed"
- Nodes are set up to have both docker and containerd present, can switch to either runtime with a configuration change to point kubelet to either.
- Restarting containerd service fixes.
- Running
containerd-stress densitywill even fail to start a container and is stuck in the same way.
Steps to reproduce the issue
- Scale up a workload so the nodes are fully committed (e.g. cluster-autoscaler will start creating nodes)
- Wait for node to start accumulating
runc initprocesses. Containerd logs will report report errors creating containers/sandboxes etc.
Describe the results you received and expected
We have a way to switch container runtime from containerd -> docker. We expected containerd to work in the same way as docker.
What version of containerd are you using?
1.6.2
Any other relevant information
runc version, CRI configuration, OS/Kernel version, etc.
runc
VERSION:
1.0.3
commit: v1.0.3-0-gf46b6ba
spec: 1.0.2-dev
go: go1.16.15
libseccomp: 2.5.1
(I also tried on 1.1.1 with same results which makes me think the issue could be with the shim?)
{
"status": {
"conditions": [
{
"type": "RuntimeReady",
"status": true,
"reason": "",
"message": ""
},
{
"type": "NetworkReady",
"status": true,
"reason": "",
"message": ""
}
]
},
"cniconfig": {
"PluginDirs": [
"/opt/cni/bin"
],
"PluginConfDir": "/etc/cni/net.d",
"PluginMaxConfNum": 1,
"Prefix": "eth",
"Networks": [
{
"Config": {
"Name": "cni-loopback",
"CNIVersion": "0.3.1",
"Plugins": [
{
"Network": {
"type": "loopback",
"ipam": {},
"dns": {}
},
"Source": "{\"type\":\"loopback\"}"
}
],
"Source": "{\n\"cniVersion\": \"0.3.1\",\n\"name\": \"cni-loopback\",\n\"plugins\": [{\n \"type\": \"loopback\"\n}]\n}"
},
"IFName": "lo"
},
{
"Config": {
"Name": "aws-cni",
"CNIVersion": "0.4.0",
"Plugins": [
{
"Network": {
"name": "aws-cni",
"type": "aws-cni",
"ipam": {},
"dns": {}
},
"Source": "{\"mtu\":\"9001\",\"name\":\"aws-cni\",\"pluginLogFile\":\"/var/log/aws-routed-eni/plugin.log\",\"pluginLogLevel\":\"DEBUG\",\"type\":\"aws-cni\",\"vethPrefix\":\"eni\"}"
},
{
"Network": {
"name": "egress-v4-cni",
"type": "egress-v4-cni",
"ipam": {
"type": "host-local"
},
"dns": {}
},
"Source": "{\"enabled\":\"false\",\"ipam\":{\"dataDir\":\"/run/cni/v6pd/egress-v4-ipam\",\"ranges\":[[{\"subnet\":\"169.254.172.0/22\"}]],\"routes\":[{\"dst\":\"0.0.0.0/0\"}],\"type\":\"host-local\"},\"mtu\":9001,\"name\":\"egress-v4-cni\",\"nodeIP\":\"172.30.228.249\",\"pluginLogFile\":\"/var/log/aws-routed-eni/egress-v4-plugin.log\",\"pluginLogLevel\":\"DEBUG\",\"type\":\"egress-v4-cni\"}"
},
{
"Network": {
"type": "portmap",
"capabilities": {
"portMappings": true
},
"ipam": {},
"dns": {}
},
"Source": "{\"capabilities\":{\"portMappings\":true},\"snat\":true,\"type\":\"portmap\"}"
}
],
"Source": "{\n \"cniVersion\": \"0.4.0\",\n \"name\": \"aws-cni\",\n \"disableCheck\": true,\n \"plugins\": [\n {\n \"name\": \"aws-cni\",\n \"type\": \"aws-cni\",\n \"vethPrefix\": \"eni\",\n \"mtu\": \"9001\",\n \"pluginLogFile\": \"/var/log/aws-routed-eni/plugin.log\",\n \"pluginLogLevel\": \"DEBUG\"\n },\n {\n \"name\": \"egress-v4-cni\",\n \"type\": \"egress-v4-cni\",\n \"mtu\": 9001,\n \"enabled\": \"false\",\n \"nodeIP\": \"172.30.228.249\",\n \"ipam\": {\n \"type\": \"host-local\",\n \"ranges\": [[{\"subnet\": \"169.254.172.0/22\"}]],\n \"routes\": [{\"dst\": \"0.0.0.0/0\"}],\n \"dataDir\": \"/run/cni/v6pd/egress-v4-ipam\"\n },\n \"pluginLogFile\": \"/var/log/aws-routed-eni/egress-v4-plugin.log\",\n \"pluginLogLevel\": \"DEBUG\"\n },\n {\n \"type\": \"portmap\",\n \"capabilities\": {\"portMappings\": true},\n \"snat\": true\n }\n ]\n}"
},
"IFName": "eth0"
}
]
},
"config": {
"containerd": {
"snapshotter": "overlayfs",
"defaultRuntimeName": "runc",
"defaultRuntime": {
"runtimeType": "",
"runtimePath": "",
"runtimeEngine": "",
"PodAnnotations": null,
"ContainerAnnotations": null,
"runtimeRoot": "",
"options": null,
"privileged_without_host_devices": false,
"baseRuntimeSpec": "",
"cniConfDir": "",
"cniMaxConfNum": 0
},
"untrustedWorkloadRuntime": {
"runtimeType": "",
"runtimePath": "",
"runtimeEngine": "",
"PodAnnotations": null,
"ContainerAnnotations": null,
"runtimeRoot": "",
"options": null,
"privileged_without_host_devices": false,
"baseRuntimeSpec": "",
"cniConfDir": "",
"cniMaxConfNum": 0
},
"runtimes": {
"runc": {
"runtimeType": "io.containerd.runc.v2",
"runtimePath": "",
"runtimeEngine": "",
"PodAnnotations": null,
"ContainerAnnotations": null,
"runtimeRoot": "",
"options": {
"BinaryName": "",
"CriuImagePath": "",
"CriuPath": "",
"CriuWorkPath": "",
"IoGid": 0,
"IoUid": 0,
"NoNewKeyring": false,
"NoPivotRoot": false,
"Root": "",
"ShimCgroup": "",
"SystemdCgroup": false
},
"privileged_without_host_devices": false,
"baseRuntimeSpec": "",
"cniConfDir": "",
"cniMaxConfNum": 0
}
},
"noPivot": false,
"disableSnapshotAnnotations": true,
"discardUnpackedLayers": false,
"ignoreRdtNotEnabledErrors": false
},
"cni": {
"binDir": "/opt/cni/bin",
"confDir": "/etc/cni/net.d",
"maxConfNum": 1,
"confTemplate": "",
"ipPref": ""
},
"registry": {
"configPath": "",
"mirrors": null,
"configs": null,
"auths": null,
"headers": null
},
"imageDecryption": {
"keyModel": "node"
},
"disableTCPService": true,
"streamServerAddress": "127.0.0.1",
"streamServerPort": "0",
"streamIdleTimeout": "4h0m0s",
"enableSelinux": false,
"selinuxCategoryRange": 1024,
"sandboxImage": "k8s.gcr.io/pause:3.6",
"statsCollectPeriod": 10,
"systemdCgroup": false,
"enableTLSStreaming": false,
"x509KeyPairStreaming": {
"tlsCertFile": "",
"tlsKeyFile": ""
},
"maxContainerLogSize": 16384,
"disableCgroup": false,
"disableApparmor": false,
"restrictOOMScoreAdj": false,
"maxConcurrentDownloads": 3,
"disableProcMount": false,
"unsetSeccompProfile": "",
"tolerateMissingHugetlbController": true,
"disableHugetlbController": true,
"device_ownership_from_security_context": false,
"ignoreImageDefinedVolumes": false,
"netnsMountsUnderStateDir": false,
"enableUnprivilegedPorts": false,
"enableUnprivilegedICMP": false,
"containerdRootDir": "/var/lib/containerd",
"containerdEndpoint": "/run/containerd/containerd.sock",
"rootDir": "/var/lib/containerd/io.containerd.grpc.v1.cri",
"stateDir": "/run/containerd/io.containerd.grpc.v1.cri"
},
"golang": "go1.17.2",
"lastCNILoadStatus": "OK",
"lastCNILoadStatus.default": "OK"
}Linux $IP 5.4.0-1071-aws #76~18.04.1-Ubuntu SMP Mon Mar 28 17:49:57 UTC 2022 x86_64 x86_64 x86_64 GNU/Linux
# Copyright 2018-2022 Docker Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
disabled_plugins = []
[metrics]
address = "127.0.0.1:1338"
#root = "/var/lib/containerd"
#state = "/run/containerd"
#subreaper = true
#oom_score = 0
#[grpc]
# address = "/run/containerd/containerd.sock"
# uid = 0
# gid = 0
[debug]
# address = "/run/containerd/debug.sock"
# uid = 0
# gid = 0
# supported values [trace, debug, info, warn, error, fatal, panic]
# https://github.com/containerd/containerd/blob/v1.6.1/cmd/containerd/command/main.go#L89
level = "error"
# https://github.com/containerd/containerd/blob/v1.6.1/log/context.go#L45-L49
format = "json"Reactions are currently unavailable