OS:
~# cat /etc/lsb-release
DISTRIB_ID=Ubuntu
DISTRIB_RELEASE=18.04
DISTRIB_CODENAME=bionic
DISTRIB_DESCRIPTION="Ubuntu 18.04.4 LTS"
~# uname -a
Linux k8s-cluster-master-02 5.3.0-40-generic #32~18.04.1-Ubuntu SMP Mon Feb 3 14:05:59 UTC 2020 x86_64 x86_64 x86_64 GNU/Linux
docker:
~# docker version
Client: Docker Engine - Community
Version: 19.03.3
API version: 1.39 (downgraded from 1.40)
Go version: go1.12.10
Git commit: a872fc2f86
Built: Tue Oct 8 00:59:59 2019
OS/Arch: linux/amd64
Experimental: false
Server: Docker Engine - Community
Engine:
Version: 18.09.9
API version: 1.39 (minimum version 1.12)
Go version: go1.11.13
Git commit: 039a7df
Built: Wed Sep 4 16:19:38 2019
OS/Arch: linux/amd64
Experimental: true
kubernetes: 1.17.3
the cluster is created by kubeadm with following config
apiVersion: kubeadm.k8s.io/v1beta2
kind: InitConfiguration
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: xxx.xxxx
ttl: 24h0m0s
usages:
- signing
- authentication
localAPIEndpoint:
advertiseAddress: 10.180.1.7
bindPort: 6443
nodeRegistration:
kubeletExtraArgs:
cloud-provider: "external"
cloud-config: "/etc/kubernetes/cloud.conf"
root-dir: "/data/kubelet"
cgroup-driver: "systemd"
certificateKey: xxxxxxx
___
apiVersion: kubeadm.k8s.io/v1beta2
kind: ClusterConfiguration
etcd:
external:
Endpoints:
- http://10.180.1.7:2379
apiServer:
extraArgs:
authorization-mode: "Node,RBAC"
enable-bootstrap-token-auth: "true"
etcd-servers: "http://10.180.1.7:2379"
bind-address: "0.0.0.0"
cloud-provider: "external"
cloud-config: "/etc/kubernetes/cloud.conf"
feature-gates: "CSIDriverRegistry=true,CSINodeInfo=true,VolumeSnapshotDataSource=true,ExpandCSIVolumes=true"
extraVolumes:
- name: cloud
hostPath: "/etc/kubernetes/cloud.conf"
mountPath: "/etc/kubernetes/cloud.conf"
certSANs:
- "10.180.1.6" # lb port1
- "10.180.1.13" # lb port2
- "10.180.1.14"
- "127.0.0.1"
- "10.96.0.10" #cluster-dns
- "10.96.0.20" #keystone-auth clusterIP
timeoutForControlPlane: "4m0s"
controllerManager:
extraArgs:
allocate-node-cidrs: "true"
cluster-cidr: "10.100.0.0/16"
cloud-provider: "external"
external-cloud-volume-plugin: openstack
cloud-config: "/etc/kubernetes/cloud.conf"
extraVolumes:
- name: cloud
hostPath: "/etc/kubernetes/cloud.conf"
mountPath: "/etc/kubernetes/cloud.conf"
certificatesDir: "/etc/kubernetes/pki"
useHyperKubeImage: false
clusterName: "SBDA Services Cluster"
kubernetesVersion : "1.17.3"
networking:
serviceSubnet: "10.96.0.0/16"
podSubnet: "10.100.0.0/16"
dnsDomain: "sap.corp"
controlPlaneEndpoint: "10.180.1.7:6443"
dns:
type: CoreDNS
cilium with kube-proxy free mode
helm install cilium cilium/cilium --version 1.7.0 \
--namespace kube-system \
--set global.kubeProxyReplacement=strict \
--set global.k8sServiceHost=10.180.1.7 \
--set global.k8sServicePort=6443 \
--set global.etcd.enabled=true \
--set global.etcd.endpoints[0]=http://10.180.1.7:2379 \
--set global.cni.chainingMode=portmap \
--set global.prometheus.enabled=true \
--set global.debug.enabled=true \
--set global.debug.verbose=trace
after cilium is deployed, coredns pod is started, but with errors
# kubectl logs coredns-6955765f44-l2bdw
.:53
[INFO] plugin/reload: Running configuration MD5 = c6208709c2b5df9ce8421fb1e22dcd3e
CoreDNS-1.6.5
linux/amd64, go1.13.4, c2fd1b2
[ERROR] plugin/errors: 2 5064704634509029259.8485005643719241973. HINFO: read udp 10.100.0.108:55662->10.180.1.2:53: i/o timeout
[ERROR] plugin/errors: 2 5064704634509029259.8485005643719241973. HINFO: read udp 10.100.0.108:36055->10.180.1.3:53: i/o timeout
[ERROR] plugin/errors: 2 5064704634509029259.8485005643719241973. HINFO: read udp 10.100.0.108:60403->10.180.1.2:53: i/o timeout
[ERROR] plugin/errors: 2 5064704634509029259.8485005643719241973. HINFO: read udp 10.100.0.108:44120->10.180.1.3:53: i/o timeout
[ERROR] plugin/errors: 2 5064704634509029259.8485005643719241973. HINFO: read udp 10.100.0.108:47284->10.180.1.2:53: i/o timeout
[ERROR] plugin/errors: 2 5064704634509029259.8485005643719241973. HINFO: read udp 10.100.0.108:50421->10.180.1.3:53: i/o timeout
[ERROR] plugin/errors: 2 5064704634509029259.8485005643719241973. HINFO: read udp 10.100.0.108:35392->10.180.1.2:53: i/o timeout
[ERROR] plugin/errors: 2 5064704634509029259.8485005643719241973. HINFO: read udp 10.100.0.108:58167->10.180.1.2:53: i/o timeout
I0227 04:13:18.512631 1 trace.go:82] Trace[779476022]: "Reflector pkg/mod/k8s.io/client-go@v0.0.0-20190620085101-78d2af792bab/tools/cache/reflector.go:98 ListAndWatch" (started: 2020-02-27 04:12:48.511616948 +0000 UTC m=+0.031016840) (total time: 30.000914324s):
Trace[779476022]: [30.000914324s] [30.000914324s] END
E0227 04:13:18.512741 1 reflector.go:125] pkg/mod/k8s.io/client-go@v0.0.0-20190620085101-78d2af792bab/tools/cache/reflector.go:98: Failed to list *v1.Endpoints: Get https://10.96.0.1:443/api/v1/endpoints?limit=500&resourceVersion=0: dial tcp 10.96.0.1:443: i/o timeout
E0227 04:13:18.512741 1 reflector.go:125] pkg/mod/k8s.io/client-go@v0.0.0-20190620085101-78d2af792bab/tools/cache/reflector.go:98: Failed to list *v1.Endpoints: Get https://10.96.0.1:443/api/v1/endpoints?limit=500&resourceVersion=0: dial tcp 10.96.0.1:443: i/o timeout
E0227 04:13:18.512741 1 reflector.go:125] pkg/mod/k8s.io/client-go@v0.0.0-20190620085101-78d2af792bab/tools/cache/reflector.go:98: Failed to list *v1.Endpoints: Get https://10.96.0.1:443/api/v1/endpoints?limit=500&resourceVersion=0: dial tcp 10.96.0.1:443: i/o timeout
E0227 04:13:18.512741 1 reflector.go:125] pkg/mod/k8s.io/client-go@v0.0.0-20190620085101-78d2af792bab/tools/cache/reflector.go:98: Failed to list *v1.Endpoints: Get https://10.96.0.1:443/api/v1/endpoints?limit=500&resourceVersion=0: dial tcp 10.96.0.1:443: i/o timeout
coredns can ping each other with pod ip, but can't ping host nor other exteral IP
# kubectl get pod -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
cilium-db8g6 1/1 Running 0 96m 10.180.1.7 k8s-cluster-master-02 <none> <none>
cilium-operator-7cc4698454-5gr6d 1/1 Running 0 95m 10.180.1.7 k8s-cluster-master-02 <none> <none>
coredns-6955765f44-8rpjf 0/1 Running 1 2m8s 10.100.0.153 k8s-cluster-master-02 <none> <none>
coredns-bf9679f84-kcglc 0/1 Running 1 2m8s 10.100.0.106 k8s-cluster-master-02 <none> <none>
coredns-bf9679f84-lh45w 0/1 Running 1 2m8s 10.100.0.110 k8s-cluster-master-02 <none> <none>
kube-apiserver-k8s-cluster-master-02 1/1 Running 25 2d18h 10.180.1.7 k8s-cluster-master-02 <none> <none>
kube-controller-manager-k8s-cluster-master-02 1/1 Running 2 2d18h 10.180.1.7 k8s-cluster-master-02 <none> <none>
kube-scheduler-k8s-cluster-master-02 1/1 Running 2 2d18h 10.180.1.7 k8s-cluster-master-02 <none> <none>
# nsenter -t 28151 -n -u
root@coredns-bf9679f84-lh45w:~# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
21: eth0@if22: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 8950 qdisc noqueue state UP group default
link/ether a2:ca:d9:4a:38:1c brd ff:ff:ff:ff:ff:ff link-netnsid 0
inet 10.100.0.110/32 scope global eth0
valid_lft forever preferred_lft forever
root@coredns-bf9679f84-lh45w:~# ping 10.100.0.106
PING 10.100.0.106 (10.100.0.106) 56(84) bytes of data.
64 bytes from 10.100.0.106: icmp_seq=1 ttl=63 time=0.060 ms
64 bytes from 10.100.0.106: icmp_seq=2 ttl=63 time=0.093 ms
^C
--- 10.100.0.106 ping statistics ---
2 packets transmitted, 2 received, 0% packet loss, time 1021ms
rtt min/avg/max/mdev = 0.060/0.076/0.093/0.018 ms
root@coredns-bf9679f84-lh45w:~# ping 10.180.1.7
PING 10.180.1.7 (10.180.1.7) 56(84) bytes of data.
^C
--- 10.180.1.7 ping statistics ---
3 packets transmitted, 0 received, 100% packet loss, time 2039ms
root@coredns-bf9679f84-lh45w ~# ping 216.58.206.14
PING 216.58.206.14 (216.58.206.14) 56(84) bytes of data.
^C
--- 216.58.206.14 ping statistics ---
5 packets transmitted, 0 received, 100% packet loss, time 4079ms
when ping 216.58.206.14 from pod, here is the tcpdump and iptables drop
~# tcpdump -i ens192 host 216.58.206.14
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on ens192, link-type EN10MB (Ethernet), capture size 262144 bytes
04:24:32.394552 IP 10.180.1.7 > 216.58.206.14: ICMP echo request, id 319, seq 1, length 64
04:24:32.399145 IP 216.58.206.14 > 10.180.1.7: ICMP echo reply, id 319, seq 1, length 64
04:24:32.399172 IP 216.58.206.14 > 10.100.0.153: ICMP echo reply, id 319, seq 1, length 64
04:24:33.421338 IP 10.180.1.7 > 216.58.206.14: ICMP echo request, id 319, seq 2, length 64
04:24:33.425692 IP 216.58.206.14 > 10.180.1.7: ICMP echo reply, id 319, seq 2, length 64
04:24:33.425732 IP 216.58.206.14 > 10.100.0.153: ICMP echo reply, id 319, seq 2, length 64
04:24:34.445341 IP 10.180.1.7 > 216.58.206.14: ICMP echo request, id 319, seq 3, length 64
04:24:34.449745 IP 216.58.206.14 > 10.180.1.7: ICMP echo reply, id 319, seq 3, length 64
04:24:34.449778 IP 216.58.206.14 > 10.100.0.153: ICMP echo reply, id 319, seq 3, length 64
04:24:35.469311 IP 10.180.1.7 > 216.58.206.14: ICMP echo request, id 319, seq 4, length 64
04:24:35.473635 IP 216.58.206.14 > 10.180.1.7: ICMP echo reply, id 319, seq 4, length 64
04:24:35.473665 IP 216.58.206.14 > 10.100.0.153: ICMP echo reply, id 319, seq 4, length 64```
~# iptables-save -c | grep DROP
:KUBE-MARK-DROP - [0:0]
[0:0] -A KUBE-MARK-DROP -j MARK --set-xmark 0x8000/0x8000
[0:0] -A DOCKER-ISOLATION-STAGE-2 -o docker0 -j DROP
[0:0] -A KUBE-FIREWALL -m comment --comment "kubernetes firewall for dropping marked packets" -m mark --mark 0x8000/0x8000 -j DROP
~# iptables-save -t nat -c | grep DROP
:KUBE-MARK-DROP - [0:0]
[0:0] -A KUBE-MARK-DROP -j MARK --set-xmark 0x8000/0x8000
and inside cilium container, seems cilium works well
~# cilium status
KVStore: Ok etcd: 1/1 connected, lease-ID=25b47084851640f6, lock lease-ID=25b47084851640f8, has-quorum=true: http://10.180.1.7:2379 - 3.3.17 (Leader)
Kubernetes: Ok 1.17 (v1.17.3) [linux/amd64]
Kubernetes APIs: ["CustomResourceDefinition", "cilium/v2::CiliumClusterwideNetworkPolicy", "cilium/v2::CiliumNetworkPolicy", "core/v1::Endpoint", "core/v1::Namespace", "core/v1::Pods", "core/v1::Service", "networking.k8s.io/v1::NetworkPolicy"]
KubeProxyReplacement: Strict [NodePort (SNAT, 30000-32767), ExternalIPs, HostReachableServices (TCP, UDP)]
Cilium: Ok OK
NodeMonitor: Disabled
Cilium health daemon: Ok
IPAM: IPv4: 4/255 allocated from 10.100.0.0/24,
Controller Status: 27/27 healthy
Proxy Status: OK, ip 10.100.0.132, 0 redirects active on ports 10000-20000
Cluster health: 1/1 reachable (2020-02-27T04:26:25Z)
root@k8s-cluster-master-02:~# cilium-health status
Probe time: 2020-02-27T04:26:25Z
Nodes:
k8s-cluster-master-02 (localhost):
Host connectivity to 10.180.1.7:
ICMP to stack: OK, RTT=138.334µs
HTTP to agent: OK, RTT=319.898µs
root@k8s-cluster-master-02:~# cilium endpoint list
ENDPOINT POLICY (ingress) POLICY (egress) IDENTITY LABELS (source:key[=value]) IPv6 IPv4 STATUS
ENFORCEMENT ENFORCEMENT
117 Disabled Disabled 47582 k8s:io.cilium.k8s.policy.cluster=default 10.100.0.106 ready
k8s:io.cilium.k8s.policy.serviceaccount=coredns
k8s:io.kubernetes.pod.namespace=kube-system
k8s:k8s-app=kube-dns
553 Disabled Disabled 47582 k8s:io.cilium.k8s.policy.cluster=default 10.100.0.110 ready
k8s:io.cilium.k8s.policy.serviceaccount=coredns
k8s:io.kubernetes.pod.namespace=kube-system
k8s:k8s-app=kube-dns
1931 Disabled Disabled 47582 k8s:io.cilium.k8s.policy.cluster=default 10.100.0.153 ready
k8s:io.cilium.k8s.policy.serviceaccount=coredns
k8s:io.kubernetes.pod.namespace=kube-system
k8s:k8s-app=kube-dns
and from cilium-operator, i can see the log
kubectl logs cilium-operator-7cc4698454-5gr6d
level=debug msg="Found kube-dns pod kube-system/coredns-6955765f44-8rpjf with identity 1931" subsys=cilium-operator
level=debug msg="Found kube-dns pod kube-system/coredns-bf9679f84-kcglc with identity 117" subsys=cilium-operator
level=debug msg="Found kube-dns pod kube-system/coredns-bf9679f84-lh45w with identity 553" subsys=cilium-operator
level=debug msg="Controller func execution time: 8.544277ms" name=restart-unmanaged-kube-dns subsys=controller uuid=fbb88f0d-590a-11ea-9f14-fa163e1d60f3
level=debug msg="Found kube-dns pod kube-system/coredns-6955765f44-8rpjf with identity 1931" subsys=cilium-operator
level=debug msg="Found kube-dns pod kube-system/coredns-bf9679f84-kcglc with identity 117" subsys=cilium-operator
level=debug msg="Found kube-dns pod kube-system/coredns-bf9679f84-lh45w with identity 553" subsys=cilium-operator
level=debug msg="Controller func execution time: 8.296282ms" name=restart-unmanaged-kube-dns subsys=controller uuid=fbb88f0d-590a-11ea-9f14-fa163e1d60f3
OS:
docker:
kubernetes: 1.17.3
the cluster is created by kubeadm with following config
cilium with kube-proxy free mode
after cilium is deployed, coredns pod is started, but with errors
coredns can ping each other with pod ip, but can't ping host nor other exteral IP
when ping 216.58.206.14 from pod, here is the tcpdump and iptables drop
and inside cilium container, seems cilium works well
and from cilium-operator, i can see the log