Skip to content

kubernetes.container.cpu.usage.limit.pct field is lose when monitor kubernetes #9221

@ypc123ypc

Description

@ypc123ypc
  • I have same trouble when monitor kubernetes container,i just can get the 'pct' data on starting,but the will miss after a minute. have some one can help me ?

My environment:

  • ES version: 6.4.2
  • docker image:
asia.gcr.io/fcitw-20181017/metricbeat:6.4.2
asia.gcr.io/fcitw-20181017/kube-state-metrics:latest
asia.gcr.io/fcitw-20181017/addon-resizer:1.0
  • platform: Google cloud platform (GCP)

My configuration

  • kube-state-metrics-deployment.yaml
apiVersion: apps/v1
# Kubernetes versions after 1.9.0 should use apps/v1
# Kubernetes versions before 1.8.0 should use apps/v1beta1 or extensions/v1beta1
kind: Deployment
metadata:
  name: kube-state-metrics
  namespace: monitor
spec:
  selector:
    matchLabels:
      k8s-app: kube-state-metrics
  replicas: 1
  template:
    metadata:
      labels:
        k8s-app: kube-state-metrics
    spec:
      serviceAccountName: kube-state-metrics
      containers:
      - name: kube-state-metrics
        image: asia.gcr.io/fcitw-20181017/kube-state-metrics:latest
        ports:
        - name: http-metrics
          containerPort: 8080
        - name: telemetry
          containerPort: 8081
        readinessProbe:
          httpGet:
            path: /healthz
            port: 8080
          initialDelaySeconds: 5
          timeoutSeconds: 5
      - name: addon-resizer
        image: asia.gcr.io/fcitw-20181017/addon-resizer:1.0
        resources:
          limits:
            cpu: 2
            memory: 2Gi
          requests:
            cpu: 1
            memory: 1Gi
        env:
          - name: MY_POD_NAME
            valueFrom:
              fieldRef:
                fieldPath: metadata.name
          - name: MY_POD_NAMESPACE
            valueFrom:
              fieldRef:
                fieldPath: metadata.namespace
        command:
          - /pod_nanny
          - --container=kube-state-metrics
          - --cpu=800m
          - --extra-cpu=10m
          - --memory=800Mi
          - --extra-memory=50Mi
          - --threshold=5
          - --deployment=kube-state-metrics
  • metricbeat-daemonset-configmap.yaml
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: metricbeat-daemonset-config
  namespace: monitor
  labels:
    k8s-app: metricbeat
data:
  metricbeat.yml: |-
    metricbeat.config.modules:
      # Mounted `metricbeat-daemonset-modules` configmap:
      path: ${path.config}/modules.d/*.yml
      # Reload module configs as they change:
      reload.enabled: false
    # To enable hints based autodiscover uncomment this:
    #metricbeat.autodiscover:
    #  providers:
    #    - type: kubernetes
    #      host: ${NODE_NAME}
    #      hints.enabled: true
    processors:
      #- add_cloud_metadata:
    fields:
      clusterenv: commit
    #output.elasticsearch:
    #  hosts: ['${ELASTICSEARCH_HOST_NODE1:10.140.0.9}:${ELASTICSEARCH_PORT_NODE1:39202}','${ELASTICSEARCH_HOST_NODE2:10.140.0.10}:${ELASTICSEARCH_PORT_NODE2:39202}','${ELASTICSEARCH_HOST_NODE3:10.140.0.11}:${ELASTICSEARCH_PORT_NODE3:39202}']
    output.logstash:
      hosts: ['${LOGSTASH_HOST_NODE1:10.140.0.7}:${LOGSTASH_PORT_NODE1:5044}','${LOGSTASH_HOST_NODE2:10.140.0.8}:${LOGSTASH_PORT_NODE2:5044}']
    #logging.level: debug
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: metricbeat-daemonset-modules
  namespace: monitor
  labels:
    k8s-app: metricbeat
data:
  system.yml: |-
    - module: system
      period: 10s
      metricsets:
        - cpu
        - load
        - memory
        - network
        #- process
        #- process_summary
        - core
        - diskio
        #- socket
      cpu.metrics: [percentages, normalized_percentages]
      processes: ['.*']
      process.include_top_n:
        #by_cpu: 5      # include top 5 processes by CPU
        #by_memory: 5   # include top 5 processes by memory
    - module: system
      period: 1m
      metricsets:
        - filesystem
        - fsstat
      processors:
      - drop_event.when.regexp:
          system.filesystem.mount_point: '^/(sys|cgroup|proc|dev|etc|host|lib)($|/)'
  kubernetes.yml: |-
    - module: kubernetes
      metricsets:
        #- node
        - system
        - pod
        - container
        - volume
      period: 10s
      host: ${NODE_NAME}
      hosts: ["localhost:10255"]
      # If using Red Hat OpenShift remove the previous hosts entry and 
      # uncomment these settings:
      #hosts: ["https://${HOSTNAME}:10250"]
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      #ssl.certificate_authorities:
        #- /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt
    #- module: kubernetes
    #  enabled: true
    #  metricsets:
    #    - state_node
    #    - state_deployment
    #    - state_replicaset
    #    - state_statefulset
    #    - state_pod
    #    - state_container
    #  period: 10s
    #  host: ${NODE_NAME}
    #  hosts: ["kube-state-metrics:8080"]
  • metricbeat-daemonset.yaml
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
  name: metricbeat
  namespace: monitor
  labels:
    k8s-app: metricbeat
spec:
  updateStrategy:
    type: RollingUpdate
  template:
    metadata:
      labels:
        k8s-app: metricbeat
    spec:
      serviceAccountName: metricbeat
      terminationGracePeriodSeconds: 30
      hostNetwork: true
      hostPID: true
      dnsPolicy: ClusterFirstWithHostNet
      restartPolicy: "Always"
      containers:
      - name: metricbeat
        image: asia.gcr.io/fcitw-20181017/metricbeat:6.4.2
        imagePullPolicy: "Always"
        args: [
          "-c", "/etc/metricbeat.yml",
          "-e",
          "-system.hostfs=/hostfs",
        ]
        env:
        - name: LOGSTASH_HOST_NODE1
          value: 10.140.0.7
        - name: LOGSTASH_PORT_NODE1
          value: "5044"
        - name: LOGSTASH_HOST_NODE2
          value: 10.140.0.8
        - name: LOGSTASH_PORT_NODE2
          value: "5044"
        - name: NODE_NAME
          valueFrom:
            fieldRef:
              fieldPath: spec.nodeName
        securityContext:
          runAsUser: 0
        resources:
          limits:
            memory: 200Mi
          requests:
            cpu: 100m
            memory: 100Mi
        volumeMounts:
        - name: config
          mountPath: /etc/metricbeat.yml
          readOnly: true
          subPath: metricbeat.yml
        - name: modules
          mountPath: /usr/share/metricbeat/modules.d
          readOnly: true
        - name: dockersock
          mountPath: /var/run/docker.sock
        - name: proc
          mountPath: /hostfs/proc
          readOnly: true
        - name: cgroup
          mountPath: /hostfs/sys/fs/cgroup
          readOnly: true
      volumes:
      - name: proc
        hostPath:
          path: /proc
      - name: cgroup
        hostPath:
          path: /sys/fs/cgroup
      - name: dockersock
        hostPath:
          path: /var/run/docker.sock
      - name: config
        configMap:
          defaultMode: 0600
          name: metricbeat-daemonset-config
      - name: modules
        configMap:
          defaultMode: 0600
          name: metricbeat-daemonset-modules
      - name: data
        hostPath:
          path: /var/lib/metricbeat-data
          type: DirectoryOrCreate

appearance

dl9o e qqkir qiq 7a8cq3
2iwz5x6u kgijl8 twncpvl


  • i have not see any error in pods and container. i also try configure metricbeat log level to debug, but i have not get any useful info.

  • some log :

2018-11-26T01:29:49.018Z        INFO    instance/beat.go:544    Home path: [/usr/share/metricbeat] Config path: [/usr/share/metricbeat] Data path: [/usr/share/metricbeat/data] Logs path: [/usr/share/metricbeat/logs]
2018-11-26T01:29:49.018Z        INFO    instance/beat.go:551    Beat UUID: b9fce4f8-1bc7-4b26-8177-68b0b0f4fd8d
2018-11-26T01:29:49.018Z        INFO    [seccomp]       seccomp/seccomp.go:116  Syscall filter successfully installed
2018-11-26T01:29:49.018Z        INFO    [beat]  instance/beat.go:768    Beat info       {"system_info": {"beat": {"path": {"config": "/usr/share/metricbeat", "data": "/usr/share/metricbeat/data", "home": "/usr/share/metricbeat", "logs": "/usr/share/metricbeat/logs"}, "type": "metricbeat", "uuid": "b9fce4f8-1bc7-4b26-8177-68b0b0f4fd8d"}}}
2018-11-26T01:29:49.018Z        INFO    [beat]  instance/beat.go:777    Build info      {"system_info": {"build": {"commit": "e193f6d68b25b7ddbe3a3ed8d60bc07fea1ef800", "libbeat": "6.4.2", "time": "2018-09-26T12:46:47.000Z", "version": "6.4.2"}}}
2018-11-26T01:29:49.018Z        INFO    [beat]  instance/beat.go:780    Go runtime info {"system_info": {"go": {"os":"linux","arch":"amd64","max_procs":8,"version":"go1.10.3"}}}
2018-11-26T01:29:49.021Z        INFO    [beat]  instance/beat.go:784    Host info       {"system_info": {"host": {"architecture":"x86_64","boot_time":"2018-11-15T12:56:37Z","containerized":false,"hostname":"gke-fcitw-fciserver-comm-default-pool-a5cfe1ea-csrt","ips":["127.0.0.1/8","::1/128","10.140.0.5/32","fe80::4001:aff:fe8c:5/64","169.254.123.1/24","10.48.0.1/24","fe80::8449:31ff:feac:4c6e/64","fe80::a01c:afff:fe86:ff96/64","fe80::bc98:a9ff:fecb:bee0/64","fe80::9cfc:5dff:fe1b:14dd/64","fe80::305c:b3ff:feed:42f1/64","fe80::a0a8:8eff:fe64:2baa/64","fe80::981f:b4ff:feb4:4c4f/64","fe80::ac4b:91ff:fe92:2b45/64","fe80::f099:1ff:fee9:682/64","fe80::5c0a:85ff:fe19:511e/64","fe80::a85f:daff:fe04:de05/64","fe80::2047:4bff:fe04:c115/64","fe80::825:46ff:feca:2aca/64","fe80::8c31:abff:fe0f:8d8c/64","fe80::5817:a2ff:fef5:27c9/64","fe80::88aa:9aff:fec7:6461/64","fe80::fcdb:b8ff:fe8c:44c7/64","fe80::287e:4cff:fe3b:4de0/64","fe80::3488:d4ff:fe4e:94f9/64"],"kernel_version":"4.4.111+","mac_addresses":["42:01:0a:8c:00:05","02:42:84:e0:98:2b","0a:58:0a:30:00:01","a2:1c:af:86:ff:96","be:98:a9:cb:be:e0","9e:fc:5d:1b:14:dd","32:5c:b3:ed:42:f1","a2:a8:8e:64:2b:aa","9a:1f:b4:b4:4c:4f","ae:4b:91:92:2b:45","f2:99:01:e9:06:82","5e:0a:85:19:51:1e","aa:5f:da:04:de:05","22:47:4b:04:c1:15","0a:25:46:ca:2a:ca","8e:31:ab:0f:8d:8c","5a:17:a2:f5:27:c9","8a:aa:9a:c7:64:61","fe:db:b8:8c:44:c7","2a:7e:4c:3b:4d:e0","36:88:d4:4e:94:f9"],"os":{"family":"redhat","platform":"centos","name":"CentOS Linux","version":"7 (Core)","major":7,"minor":5,"patch":1804,"codename":"Core"},"timezone":"UTC","timezone_offset_sec":0,"id":"7bf1a9675da246fc9e8cfed31ee8e3cc"}}}
2018-11-26T01:29:49.022Z        INFO    [beat]  instance/beat.go:813    Process info    {"system_info": {"process": {"capabilities": {"inheritable":["chown","dac_override","fowner","fsetid","kill","setgid","setuid","setpcap","net_bind_service","net_raw","sys_chroot","mknod","audit_write","setfcap"],"permitted":["chown","dac_override","fowner","fsetid","kill","setgid","setuid","setpcap","net_bind_service","net_raw","sys_chroot","mknod","audit_write","setfcap"],"effective":["chown","dac_override","fowner","fsetid","kill","setgid","setuid","setpcap","net_bind_service","net_raw","sys_chroot","mknod","audit_write","setfcap"],"bounding":["chown","dac_override","fowner","fsetid","kill","setgid","setuid","setpcap","net_bind_service","net_raw","sys_chroot","mknod","audit_write","setfcap"],"ambient":null}, "cwd": "/usr/share/metricbeat", "exe": "/usr/share/metricbeat/metricbeat", "name": "metricbeat", "pid": 23379, "ppid": 23047, "seccomp": {"mode":"filter","no_new_privs":true}, "start_time": "2018-11-26T01:29:48.790Z"}}}
2018-11-26T01:29:49.022Z        INFO    instance/beat.go:273    Setup Beat: metricbeat; Version: 6.4.2
2018-11-26T01:29:49.022Z        INFO    pipeline/module.go:98   Beat name: gke-fcitw-fciserver-comm-default-pool-a5cfe1ea-csrt
2018-11-26T01:29:49.023Z        INFO    instance/beat.go:367    metricbeat start running.
2018-11-26T01:29:49.023Z        INFO    [monitoring]    log/log.go:114  Starting metrics logging every 30s
2018-11-26T01:29:49.024Z        INFO    kubernetes/util.go:71   kubernetes: Using node gke-fcitw-fciserver-comm-default-pool-a5cfe1ea-csrt provided in the config
2018-11-26T01:29:49.024Z        INFO    kubernetes/util.go:71   kubernetes: Using node gke-fcitw-fciserver-comm-default-pool-a5cfe1ea-csrt provided in the config
2018-11-26T01:29:49.025Z        INFO    filesystem/filesystem.go:58     Ignoring filesystem types: sysfs, rootfs, ramfs, bdev, proc, cpuset, cgroup, tmpfs, devtmpfs, binfmt_misc, debugfs, tracefs, securityfs, sockfs, bpf, pipefs, devpts, hugetlbfs, autofs, overlay, pstore, mqueue
2018-11-26T01:29:49.025Z        INFO    fsstat/fsstat.go:59     Ignoring filesystem types: sysfs, rootfs, ramfs, bdev, proc, cpuset, cgroup, tmpfs, devtmpfs, binfmt_misc, debugfs, tracefs, securityfs, sockfs, bpf, pipefs, devpts, hugetlbfs, autofs, overlay, pstore, mqueue
2018-11-26T01:29:49.025Z        INFO    cfgfile/reload.go:141   Config reloader started
2018-11-26T01:29:49.026Z        INFO    filesystem/filesystem.go:58     Ignoring filesystem types: sysfs, rootfs, ramfs, bdev, proc, cpuset, cgroup, tmpfs, devtmpfs, binfmt_misc, debugfs, tracefs, securityfs, sockfs, bpf, pipefs, devpts, hugetlbfs, autofs, overlay, pstore, mqueue
2018-11-26T01:29:49.027Z        INFO    fsstat/fsstat.go:59     Ignoring filesystem types: sysfs, rootfs, ramfs, bdev, proc, cpuset, cgroup, tmpfs, devtmpfs, binfmt_misc, debugfs, tracefs, securityfs, sockfs, bpf, pipefs, devpts, hugetlbfs, autofs, overlay, pstore, mqueue
2018-11-26T01:29:49.028Z        INFO    kubernetes/util.go:71   kubernetes: Using node gke-fcitw-fciserver-comm-default-pool-a5cfe1ea-csrt provided in the config
2018-11-26T01:29:49.028Z        INFO    kubernetes/util.go:71   kubernetes: Using node gke-fcitw-fciserver-comm-default-pool-a5cfe1ea-csrt provided in the config
2018-11-26T01:29:49.030Z        INFO    kubernetes/watcher.go:180       kubernetes: Performing a resource sync for *v1.PodList
2018-11-26T01:29:49.030Z        INFO    kubernetes/watcher.go:180       kubernetes: Performing a resource sync for *v1.PodList
2018-11-26T01:29:49.029Z        INFO    cfgfile/reload.go:196   Loading of config files completed.
2018-11-26T01:29:49.049Z        INFO    kubernetes/watcher.go:194       kubernetes: Resource sync done
2018-11-26T01:29:49.049Z        INFO    kubernetes/watcher.go:238       kubernetes: Watching API for resource events
2018-11-26T01:29:49.054Z        INFO    kubernetes/watcher.go:194       kubernetes: Resource sync done
2018-11-26T01:29:49.055Z        INFO    kubernetes/watcher.go:238       kubernetes: Watching API for resource events
2018-11-26T01:29:50.031Z        INFO    pipeline/output.go:95   Connecting to failover(backoff(async(tcp://10.140.0.7:5044)),backoff(async(tcp://10.140.0.8:5044)))
2018-11-26T01:29:50.033Z        INFO    pipeline/output.go:105  Connection to failover(backoff(async(tcp://10.140.0.7:5044)),backoff(async(tcp://10.140.0.8:5044))) established
2018-11-26T01:30:19.025Z        INFO    [monitoring]    log/log.go:141  Non-zero metrics in the last 30s        {"monitoring": {"metrics": {"beat":{"cpu":{"system":{"ticks":60,"time":{"ms":62}},"total":{"ticks":190,"time":{"ms":193},"value":190},"user":{"ticks":130,"time":{"ms":131}}},"info":{"ephemeral_id":"8f6084a8-a9a5-4e7d-9f4e-cca6ab4cb3a2","uptime":{"ms":30033}},"memstats":{"gc_next":10748672,"memory_alloc":5473480,"memory_total":22745072,"rss":42176512}},"libbeat":{"config":{"module":{"running":0},"reloads":1},"output":{"events":{"acked":343,"batches":3,"total":343},"read":{"bytes":24},"type":"logstash","write":{"bytes":42007}},"pipeline":{"clients":6,"events":{"active":0,"filtered":1,"published":343,"retry":115,"total":344},"queue":{"acked":343}}},"metricbeat":{"kubernetes":{"container":{"events":84,"success":84},"pod":{"events":60,"success":60},"system":{"events":6,"success":6},"volume":{"events":63,"success":63}},"system":{"core":{"events":24,"success":24},"cpu":{"events":3,"success":3},"diskio":{"events":30,"success":30},"filesystem":{"events":1,"success":1},"fsstat":{"events":1,"success":1},"load":{"events":3,"success":3},"memory":{"events":3,"success":3},"network":{"events":66,"success":66}}},"system":{"cpu":{"cores":8},"load":{"1":0.06,"15":0.15,"5":0.1,"norm":{"1":0.0075,"15":0.0188,"5":0.0125}}}}}}
2018-11-26T01:30:49.025Z        INFO    [monitoring]    log/log.go:141  Non-zero metrics in the last 30s        {"monitoring": {"metrics": {"beat":{"cpu":{"system":{"ticks":80,"time":{"ms":24}},"total":{"ticks":270,"time":{"ms":88},"value":270},"user":{"ticks":190,"time":{"ms":64}}},"info":{"ephemeral_id":"8f6084a8-a9a5-4e7d-9f4e-cca6ab4cb3a2","uptime":{"ms":60034}},"memstats":{"gc_next":10909296,"memory_alloc":9241360,"memory_total":34115704,"rss":983040}},"libbeat":{"config":{"module":{"running":0}},"output":{"events":{"acked":342,"batches":3,"total":342},"read":{"bytes":24},"write":{"bytes":42515}},"pipeline":{"clients":6,"events":{"active":0,"published":342,"total":342},"queue":{"acked":342}}},"metricbeat":{"kubernetes":{"container":{"events":84,"success":84},"pod":{"events":60,"success":60},"system":{"events":6,"success":6},"volume":{"events":63,"success":63}},"system":{"core":{"events":24,"success":24},"cpu":{"events":3,"success":3},"diskio":{"events":30,"success":30},"load":{"events":3,"success":3},"memory":{"events":3,"success":3},"network":{"events":66,"success":66}}},"system":{"load":{"1":0.04,"15":0.14,"5":0.09,"norm":{"1":0.005,"15":0.0175,"5":0.0113}}}}}}
2018-11-26T01:31:19.024Z        INFO    [monitoring]    log/log.go:141  Non-zero metrics in the last 30s        {"monitoring": {"metrics": {"beat":{"cpu":{"system":{"ticks":110,"time":{"ms":24}},"total":{"ticks":380,"time":{"ms":102},"value":380},"user":{"ticks":270,"time":{"ms":78}}},"info":{"ephemeral_id":"8f6084a8-a9a5-4e7d-9f4e-cca6ab4cb3a2","uptime":{"ms":90034}},"memstats":{"gc_next":10921488,"memory_alloc":5559640,"memory_total":45509240,"rss":188416}},"libbeat":{"config":{"module":{"running":0}},"output":{"events":{"acked":343,"batches":3,"total":343},"read":{"bytes":30},"write":{"bytes":41798}},"pipeline":{"clients":6,"events":{"active":0,"filtered":1,"published":343,"total":344},"queue":{"acked":343}}},"metricbeat":{"kubernetes":{"container":{"events":84,"success":84},"pod":{"events":60,"success":60},"system":{"events":6,"success":6},"volume":{"events":63,"success":63}},"system":{"core":{"events":24,"success":24},"cpu":{"events":3,"success":3},"diskio":{"events":30,"success":30},"filesystem":{"events":1,"success":1},"fsstat":{"events":1,"success":1},"load":{"events":3,"success":3},"memory":{"events":3,"success":3},"network":{"events":66,"success":66}}},"system":{"load":{"1":0.1,"15":0.14,"5":0.1,"norm":{"1":0.0125,"15":0.0175,"5":0.0125}}}}}}

have some one can help me? when the metricbeat starting, the 'pct' data is ok, but is will lose after a minute. is my config have error?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions