Skip to content

Down backend server causes ~1500req/s to the Docker Swarm internal DNS #2503

@elyulka

Description

@elyulka

Detailed Description of the Problem

I've noticed significant number of error messages apperaing at the rate of ~1500 per second in the Docker Daemon log looking like:

"error":"read udp 127.0.0.1:42034->127.0.0.11:53: i/o timeout",
"level":"error",
"message":"[resolver] failed to query DNS server: 127.0.0.11:53, query: ;efk_kibana.\\tIN\\t A"

This swarm service is currently down. Configuration works when service is up.
I've narrowed down issue to the running haproxy instances. Seems like all defined timeouts are ignored and haproxy constantly polls DNS server though it uses just 0.4% CPU.

Expected Behavior

Haproxy takes into account timeout settings and asks DNS at most once per second.

Steps to Reproduce the Behavior

  1. Define Docker swarm stack with haproxy and down backend service.
  2. Examine docker daemon log with journalctl -u docker.service

Do you have any idea what may have caused this?

No response

Do you have an idea how to solve the issue?

No response

What is your configuration?

global
    stats socket /var/run/haproxy.sock mode 660 expose-fd listeners level admin
    log     	 stderr len 65535 format raw local0

    chroot  	 /var/lib/haproxy
    pidfile 	 /var/run/haproxy.pid
    maxconn 	 10000

    spread-checks 5
    user    	 haproxy
    group   	 haproxy

    ca-base /etc/ssl/certs
    crt-base /etc/ssl/private

    # see https://ssl-config.mozilla.org/#server=haproxy&version=2.8&config=intermediate&openssl=1.1.1k&guideline=5.7
    # intermediate configuration
    ssl-default-bind-ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:DHE-RSA-CHACHA20-POLY1305
    ssl-default-bind-ciphersuites TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256
    ssl-default-bind-options prefer-client-ciphers no-sslv3 no-tlsv10 no-tlsv11 no-tls-tickets

    ssl-default-server-ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:DHE-RSA-CHACHA20-POLY1305
    ssl-default-server-ciphersuites TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256
    ssl-default-server-options no-sslv3 no-tlsv10 no-tlsv11 no-tls-tickets

    # utilize system-wide crypto-policies
    #ssl-default-bind-ciphers PROFILE=SYSTEM
    # ssl-default-server-ciphers PROFILE=SYSTEM
    # curl https://ssl-config.mozilla.org/ffdhe2048.txt > ./dhparam
    ssl-dh-param-file /etc/haproxy/dhparam
    master-worker

resolvers docker
    # proably need to prefix with tcp@ if number of backends > 10
    nameserver dns1 127.0.0.11:53
    accepted_payload_size 8192 # allow larger DNS payloads
    resolve_retries 3

    timeout resolve 1s
    timeout retry   1s

    hold other      10s
    hold refused    10s
    hold nx         10s
    hold timeout    10s
    hold valid      10s
    hold obsolete   10s

defaults
    # fix many dockerd errors "[resolver] failed to query DNS server: 127.0.0.11:53, query: ;efk_kibana. IN AAAA"
    default-server resolve-prefer ipv4

    timeout queue           1m
    timeout connect         10s
    timeout client          30s
    timeout server          30s

    # Protection from Slowloris attacks
    timeout http-request    5s

    timeout http-keep-alive 10s
    timeout check           10s
    timeout tunnel          2m
    timeout client-fin      1s
    timeout server-fin      1s
    log-format '{"host":"%H","ident":"haproxy","pid":%pid,"time":"%Tl","haproxy":{"conn":{"act":%ac,"fe":%fc,"be":%bc,"srv":%sc},"queue":{"backend":%bq,"srv":%sq},"time":{"tq":%Tq,"tw":%Tw,"tc":%Tc,"tr":%Tr,"tt":%Tt},"termination_state":"%tsc","retries":%rc,"network":{"client_ip":"%ci","client_port":%cp,"frontend_ip":"%fi","frontend_port":%fp},"ssl":{"version":"%sslv","ciphers":"%sslc"},"request":{"method":"%HM","hu":"%HU","hp":"%HP","hq":"%[var(txn.querystring),json(utf8s)]","protocol":"%HV","header":{"host":"%[capture.req.hdr(0),json(utf8s)]","xforwardfor":"%[capture.req.hdr(1),json(utf8s)]","referer":"%[capture.req.hdr(2),json(utf8s)]"}},"name":{"backend":"%b","frontend":"%ft","server":"%s"},"response":{"status_code":%ST,"header":{"xrequestid":"%[capture.res.hdr(0),json(utf8s)]"}},"bytes":{"uploaded":%U,"read":%B}}}'
   
    error-log-format '{"host":"%H","ident":"haproxy","pid":%pid,"time":"%tr","haproxy":{"conn":{"act":%ac,"fe":%fc},"network":{"client_ip":"%ci","client_port":%cp},"ssl":{"version":"%sslv","ciphers":"%sslc","sni":"%[ssl_fc_sni,json(utf8s)]","is_resumed":%[ssl_fc_is_resumed,json(utf8s)]},"name":{"frontend":"%ft"}},"err":{"id":%[fc_err],"ssl_fc":%[ssl_fc_err],"ssl_c":%[ssl_c_err],"ssl_c_ca":%[ssl_c_ca_err]}}'
    log global

    mode http
    # options
    # option httplog
    option                  dontlognull
    option http-server-close
    # net yet supported by nginx, so skip adding IETF RFC7239 header
    option forwarded host by by_port for
    # add non-standard X-Forwarded-For header
    option forwardfor except 127.0.0.0/8
    option                  redispatch
    retries                 3
    # per frontend
    maxconn 	 5000
    backlog 131072

frontend fe_kibana
    bind *:5601 ssl crt /opt alpn h2,http/1.1
    http-request auth unless { http_auth(statcredentials) }
    http-request del-header Authorization
    acl host_app hdr_beg(host) -i "${APP_HOSTNAME}"
    use_backend be_kibana if host_app

backend be_kibana
    server-template kibana- 1 efk_kibana:5601 check resolvers docker init-addr libc,none

userlist statcredentials
    user "${HAPROXY_STATS_USER}"   insecure-password "${HAPROXY_STATS_PASSWORD}"


### Output of `haproxy -vv`

```plain
HAProxy version 2.8.7-1a82cdf 2024/02/26 - https://haproxy.org/
Status: long-term supported branch - will stop receiving fixes around Q2 2028.
Known bugs: http://www.haproxy.org/bugs/bugs-2.8.7.html
Running on: Linux 5.15.0-94-generic #104-Ubuntu SMP Tue Jan 9 15:25:40 UTC 2024 x86_64
Build options :
  TARGET  = linux-glibc
  CPU     = generic
  CC      = cc
  CFLAGS  = -O2 -g -Wall -Wextra -Wundef -Wdeclaration-after-statement -Wfatal-errors -Wtype-limits -Wshift-negative-value -Wshift-overflow=2 -Wduplicated-cond -Wnull-dereference -fwrapv -Wno-address-of-packed-member -Wno-unused-label -Wno-sign-compare -Wno-unused-parameter -Wno-clobbered -Wno-missing-field-initializers -Wno-cast-function-type -Wno-string-plus-int -Wno-atomic-alignment
  OPTIONS = USE_PTHREAD_EMULATION=1 USE_LINUX_TPROXY=1 USE_GETADDRINFO=1 USE_OPENSSL=1 USE_LUA=1 USE_SLZ=1 USE_TFO=1 USE_QUIC=1 USE_PROMEX=1 USE_PCRE2=1 USE_PCRE2_JIT=1 USE_QUIC_OPENSSL_COMPAT=1
  DEBUG   = -DDEBUG_STRICT -DDEBUG_MEMORY_POOLS

Feature list : -51DEGREES +ACCEPT4 +BACKTRACE -CLOSEFROM +CPU_AFFINITY +CRYPT_H -DEVICEATLAS +DL -ENGINE +EPOLL -EVPORTS +GETADDRINFO -KQUEUE -LIBATOMIC +LIBCRYPT +LINUX_CAP +LINUX_SPLICE +LINUX_TPROXY +LUA +MATH -MEMORY_PROFILING +NETFILTER +NS -OBSOLETE_LINKER +OPENSSL -OPENSSL_WOLFSSL -OT -PCRE +PCRE2 +PCRE2_JIT -PCRE_JIT +POLL +PRCTL -PROCCTL +PROMEX +PTHREAD_EMULATION +QUIC +QUIC_OPENSSL_COMPAT +RT +SHM_OPEN +SLZ +SSL -STATIC_PCRE -STATIC_PCRE2 -SYSTEMD +TFO +THREAD +THREAD_DUMP +TPROXY -WURFL -ZLIB

Default settings :
  bufsize = 16384, maxrewrite = 1024, maxpollevents = 200

Built with multi-threading support (MAX_TGROUPS=16, MAX_THREADS=256, default=2).
Built with OpenSSL version : OpenSSL 3.0.11 19 Sep 2023
Running on OpenSSL version : OpenSSL 3.0.11 19 Sep 2023
OpenSSL library supports TLS extensions : yes
OpenSSL library supports SNI : yes
OpenSSL library supports : TLSv1.0 TLSv1.1 TLSv1.2 TLSv1.3
OpenSSL providers loaded : default
Built with Lua version : Lua 5.4.4
Built with the Prometheus exporter as a service
Built with network namespace support.
Built with libslz for stateless compression.
Compression algorithms supported : identity("identity"), deflate("deflate"), raw-deflate("deflate"), gzip("gzip")
Built with transparent proxy support using: IP_TRANSPARENT IPV6_TRANSPARENT IP_FREEBIND
Built with PCRE2 version : 10.42 2022-12-11
PCRE2 library supports JIT : yes
Encrypted password support via crypt(3): yes
Built with gcc compiler version 12.2.0

Available polling systems :
      epoll : pref=300,  test result OK
       poll : pref=200,  test result OK
     select : pref=150,  test result OK
Total: 3 (3 usable), will use epoll.

Available multiplexer protocols :
(protocols marked as <default> cannot be specified using 'proto' keyword)
       quic : mode=HTTP  side=FE     mux=QUIC  flags=HTX|NO_UPG|FRAMED
         h2 : mode=HTTP  side=FE|BE  mux=H2    flags=HTX|HOL_RISK|NO_UPG
       fcgi : mode=HTTP  side=BE     mux=FCGI  flags=HTX|HOL_RISK|NO_UPG
  <default> : mode=HTTP  side=FE|BE  mux=H1    flags=HTX
         h1 : mode=HTTP  side=FE|BE  mux=H1    flags=HTX|NO_UPG
  <default> : mode=TCP   side=FE|BE  mux=PASS  flags=
       none : mode=TCP   side=FE|BE  mux=PASS  flags=NO_UPG

Available services : prometheus-exporter
Available filters :
	[BWLIM] bwlim-in
	[BWLIM] bwlim-out
	[CACHE] cache
	[COMP] compression
	[FCGI] fcgi-app
	[SPOE] spoe
	[TRACE] trace


### Last Outputs and Backtraces

```plain
[NOTICE]   (1) : haproxy version is 2.8.7-1a82cdf
[NOTICE]   (1) : path to executable is /usr/local/sbin/haproxy
[WARNING]  (1) : config : log format ignored for frontend 'fe_stats' since it has no log address.
[NOTICE]   (1) : config : [/etc/haproxy/vhost_devops.cfg:72] : 'server be_kibana/kibana-1' : could not resolve address 'efk_kibana', disabling server.
[NOTICE]   (1) : New worker (8) forked
[NOTICE]   (1) : Loading success.


### Additional Information

_No response_

Metadata

Metadata

Assignees

No one assigned

    Labels

    status: invalidThis issue is not within HAProxy itself.type: bugThis issue describes a bug.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions