-
Notifications
You must be signed in to change notification settings - Fork 632
Description
Is there an existing issue already for this bug?
- I have searched for an existing issue, and could not find anything. I believe this is a new bug.
I have read the troubleshooting guide
- I have read the troubleshooting guide and I think this is a new bug.
I am running a supported version of CloudNativePG
- I have read the troubleshooting guide and I think this is a new bug.
Contact Details
Version
1.28 (latest patch)
What version of Kubernetes are you using?
1.34
What is your Kubernetes environment?
Cloud: Amazon EKS
How did you install the operator?
Helm
What happened?
apiVersion: postgresql.cnpg.io/v1
kind: Pooler
metadata:
generation: 4
name: pg-cluster-rw-pool
spec:
cluster:
name: pg-cluster
instances: 3
pgbouncer:
parameters:
default_pool_size: "100"
max_client_conn: "1000"
paused: false
poolMode: transaction
template:
spec:
containers:
- name: pgbouncer
resources:
limits:
cpu: 100m
memory: 200Mi
requests:
cpu: 10m
memory: 10Mi
initContainers:
- name: bootstrap-controller
resources:
limits:
cpu: 100m
memory: 200Mi
requests:
cpu: 10m
memory: 10Mi
type: rw
I applied the update to 1.28.0 with argo and began getting connection refused errors in the logs
Cluster resource
apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
generation: 16
name: pg-cluster
namespace: cluster-dev
spec:
affinity:
enablePodAntiAffinity: true
podAntiAffinityType: preferred
topologyKey: topology.ebs.csi.aws.com/zone
bootstrap:
recovery:
database: app
owner: app
source: pg-cluster
enablePDB: true
enableSuperuserAccess: true
externalClusters:
- barmanObjectStore:
destinationPath: s3://pulse-fm-prod-postgres/pg-cluster/
s3Credentials:
inheritFromIAMRole: true
region:
key: region
name: fm-timescale-aws-region
wal:
compression: bzip2
maxParallel: 8
name: pg-cluster
failoverDelay: 0
imageName: redacted.dkr.ecr.us-east-1.amazonaws.com/cnpg:18-0-ts2-23-0 # cnpg/timescaledb image
instances: 2
logLevel: info
managed:
services:
additional:
- selectorType: rw
serviceTemplate:
metadata:
annotations:
ad.datadoghq.com/service.checks: |
{
"postgres": {
"init_config": {},
"instances": [
{
"host": "%%host%%",
"reported_hostname": "fm-dev",
"port":"5432",
"username":"datadog",
"password":"ENC[k8s_secret@default/dd-tools-pass/dd-tools-pass]",
"dbname": "fm",
"dbm": "true",
"collect_schemas": {
"enabled": true
},
"relations": [
{"relation_regex": ".*"}
],
"tags": ["team:tools", "env:dev", "postgres_cluster:tools"]
}
]
}
}
external-dns.alpha.kubernetes.io/hostname: tools.default.db.tfm-dev.internal
service.beta.kubernetes.io/aws-load-balancer-additional-resource-tags: Usage=dev,Product=fm
service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "3600"
service.beta.kubernetes.io/aws-load-balancer-internal: "true"
service.beta.kubernetes.io/aws-load-balancer-scheme: internal
service.beta.kubernetes.io/aws-load-balancer-type: nlb
name: cluster-external-rw
spec:
type: LoadBalancer
updateStrategy: patch
- selectorType: ro
serviceTemplate:
metadata:
annotations:
external-dns.alpha.kubernetes.io/hostname: tools-repl.default.db.tfm-dev.internal
service.beta.kubernetes.io/aws-load-balancer-additional-resource-tags: Usage=dev,Product=fm
service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "3600"
service.beta.kubernetes.io/aws-load-balancer-internal: "true"
service.beta.kubernetes.io/aws-load-balancer-scheme: internal
service.beta.kubernetes.io/aws-load-balancer-type: nlb
name: cluster-external-ro
spec:
type: LoadBalancer
updateStrategy: patch
maxSyncReplicas: 0
minSyncReplicas: 0
monitoring:
customQueriesConfigMap:
- key: queries
name: cnpg-default-monitoring
disableDefaultQueries: false
enablePodMonitor: true
plugins:
- enabled: true
isWALArchiver: true
name: barman-cloud.cloudnative-pg.io
parameters:
barmanObjectName: pg-cluster
serverName: pg-cluster
postgresGID: 26
postgresUID: 26
postgresql:
parameters:
archive_mode: "on"
archive_timeout: 5min
dynamic_shared_memory_type: posix
full_page_writes: "on"
log_destination: csvlog
log_directory: /controller/log
log_filename: postgres
log_rotation_age: "0"
log_rotation_size: "0"
log_truncate_on_rotation: "false"
logging_collector: "on"
max_connections: "300"
max_locks_per_transaction: "128"
max_parallel_workers: "32"
max_replication_slots: "32"
max_standby_archive_delay: 900s
max_standby_streaming_delay: 900s
max_worker_processes: "32"
password_encryption: scram-sha-256
pg_stat_statements.track: ALL
pg_stat_statements.track_utility: "off"
shared_memory_type: mmap
shared_preload_libraries: ""
ssl_max_protocol_version: TLSv1.3
ssl_min_protocol_version: TLSv1.3
timescaledb.license_key: CommunityLicense
track_activity_query_size: "4096"
wal_keep_size: 512MB
wal_level: logical
wal_log_hints: "on"
wal_receiver_timeout: 5s
wal_sender_timeout: 5s
shared_preload_libraries:
- timescaledb
- pg_stat_statements
- set_user
- pg_stat_kcache
syncReplicaElectionConstraint:
enabled: false
primaryUpdateMethod: restart
primaryUpdateStrategy: unsupervised
probes:
liveness:
isolationCheck:
connectionTimeout: 1000
enabled: true
requestTimeout: 1000
replicationSlots:
highAvailability:
enabled: true
slotPrefix: _cnpg_
synchronizeReplicas:
enabled: true
updateInterval: 30
resources:
limits:
cpu: "4"
memory: 4Gi
requests:
cpu: 500m
memory: 500Mi
smartShutdownTimeout: 180
startDelay: 3600
stopDelay: 1800
storage:
resizeInUseVolumes: true
size: 100Gi
storageClass: gp3
switchoverDelay: 3600
topologySpreadConstraints:
- maxSkew: 1
topologyKey: topology.ebs.csi.aws.com/zone
whenUnsatisfiable: DoNotSchedule
status:
availableArchitectures:
- goArch: amd64
hash: 527e2e3b680dfba7f7578b98530f755f36156e7be00acda3318ef36fe4f4418f
- goArch: arm64
hash: aaa74c6061f3fe30f230a664b4f6076bb886c9a89d4eba3293483525c5cff533
certificates:
clientCASecret: pg-cluster-ca
expirations:
pg-cluster-ca: 2026-02-02 01:17:35 +0000 UTC
pg-cluster-replication: 2026-02-02 01:17:35 +0000 UTC
pg-cluster-server: 2026-02-18 00:18:39 +0000 UTC
replicationTLSSecret: pg-cluster-replication
serverAltDNSNames:
- pg-cluster-rw
- pg-cluster-rw.cluster-dev
- pg-cluster-rw.cluster-dev.svc
- pg-cluster-rw.cluster-dev.svc.cluster.local
- pg-cluster-r
- pg-cluster-r.cluster-dev
- pg-cluster-r.cluster-dev.svc
- pg-cluster-r.cluster-dev.svc.cluster.local
- pg-cluster-ro
- pg-cluster-ro.cluster-dev
- pg-cluster-ro.cluster-dev.svc
- pg-cluster-ro.cluster-dev.svc.cluster.local
- cluster-external-rw
- cluster-external-rw.cluster-dev
- cluster-external-rw.cluster-dev.svc
- cluster-external-rw.cluster-dev.svc.cluster.local
- cluster-external-ro
- cluster-external-ro.cluster-dev
- cluster-external-ro.cluster-dev.svc
- cluster-external-ro.cluster-dev.svc.cluster.local
serverCASecret: pg-cluster-ca
serverTLSSecret: pg-cluster-server
cloudNativePGCommitHash: a9696201f
cloudNativePGOperatorHash: 527e2e3b680dfba7f7578b98530f755f36156e7be00acda3318ef36fe4f4418f
conditions:
- lastTransitionTime: "2025-12-02T17:17:43Z"
message: A single, unique system ID was found across reporting instances.
reason: Unique
status: "True"
type: ConsistentSystemID
- lastTransitionTime: "2025-12-09T18:26:16Z"
message: Cluster is Ready
reason: ClusterIsReady
status: "True"
type: Ready
- lastTransitionTime: "2025-12-09T18:24:43Z"
message: Continuous archiving is working
reason: ContinuousArchivingSuccess
status: "True"
type: ContinuousArchiving
- lastTransitionTime: "2025-12-09T00:02:01Z"
message: Backup was successful
reason: LastBackupSucceeded
status: "True"
type: LastBackupSucceeded
configMapResourceVersion:
metrics:
cnpg-default-monitoring: "359312045"
currentPrimary: pg-cluster-4
currentPrimaryTimestamp: "2025-12-09T18:24:38.717762Z"
healthyPVC:
- pg-cluster-4
- pg-cluster-6
image: redacted.dkr.ecr.us-east-1.amazonaws.com/cnpg:18-0-ts2-23-0
instanceNames:
- pg-cluster-4
- pg-cluster-6
instances: 2
instancesReportedState:
pg-cluster-4:
ip: 10.4.6.97
isPrimary: true
timeLineID: 66
pg-cluster-6:
ip: 10.4.0.121
isPrimary: false
timeLineID: 66
instancesStatus:
healthy:
- pg-cluster-4
- pg-cluster-6
latestGeneratedNode: 6
managedRolesStatus: {}
pgDataImageInfo:
image: redacted.dkr.ecr.us-east-1.amazonaws.com/cnpg:18-0-ts2-23-0
majorVersion: 18
phase: Cluster in healthy state
pluginStatus:
- capabilities:
- TYPE_RECONCILER_HOOKS
- TYPE_LIFECYCLE_SERVICE
name: barman-cloud.cloudnative-pg.io
version: 0.9.0
poolerIntegrations:
pgBouncerIntegration:
secrets:
- pg-cluster-pooler
pvcCount: 2
readService: pg-cluster-r
readyInstances: 2
secretsResourceVersion:
applicationSecretVersion: "386344441"
clientCaSecretVersion: "359312014"
replicationSecretVersion: "359312016"
serverCaSecretVersion: "359312014"
serverSecretVersion: "370780410"
superuserSecretVersion: "386344440"
switchReplicaClusterStatus: {}
systemID: "7569348818666958871"
targetPrimary: pg-cluster-4
targetPrimaryTimestamp: "2025-12-09T18:24:22.985685Z"
timelineID: 66
topology:
instances:
pg-cluster-4: {}
pg-cluster-6: {}
nodesUsed: 2
successfullyExtracted: true
writeService: pg-cluster-rwRelevant log output
{"level":"info","ts":"2025-12-09T18:58:25.245511759Z","msg":"record","logger":"pgbouncer-manager","pipe":"stderr","record":{"timestamp":"2025-12-09 18:58:25.245 UTC","pid":"12","level":"LOG","msg":"kernel file descriptor limit: 65536 (hard: 1048576); max_client_conn: 1000, max expected fd use: 1012"}}
{"level":"info","ts":"2025-12-09T18:58:25.245962782Z","msg":"record","logger":"pgbouncer-manager","pipe":"stderr","record":{"timestamp":"2025-12-09 18:58:25.245 UTC","pid":"12","level":"LOG","msg":"listening on 0.0.0.0:5432"}}
{"level":"info","ts":"2025-12-09T18:58:25.246037299Z","msg":"record","logger":"pgbouncer-manager","pipe":"stderr","record":{"timestamp":"2025-12-09 18:58:25.245 UTC","pid":"12","level":"LOG","msg":"listening on [::]:5432"}}
{"level":"info","ts":"2025-12-09T18:58:25.246104012Z","msg":"record","logger":"pgbouncer-manager","pipe":"stderr","record":{"timestamp":"2025-12-09 18:58:25.246 UTC","pid":"12","level":"LOG","msg":"listening on unix:/controller/run/.s.PGSQL.5432"}}
{"level":"info","ts":"2025-12-09T18:58:25.246147311Z","msg":"record","logger":"pgbouncer-manager","pipe":"stderr","record":{"timestamp":"2025-12-09 18:58:25.246 UTC","pid":"12","level":"LOG","msg":"process up: PgBouncer 1.25.1, libevent 2.1.12-stable (epoll), adns: c-ares 1.34.5, tls: OpenSSL 3.5.4 30 Sep 2025"}}
{"level":"info","ts":"2025-12-09T18:58:45.460643275Z","msg":"record","logger":"pgbouncer-manager","pipe":"stderr","record":{"timestamp":"2025-12-09 18:58:45.460 UTC","pid":"12","level":"LOG","msg":"C-0x557233534560: (nodb)/(nouser)@10.4.0.55:35668 registered new auto-database: fm"}}
{"level":"info","ts":"2025-12-09T18:58:45.460751086Z","msg":"record","logger":"pgbouncer-manager","pipe":"stderr","record":{"timestamp":"2025-12-09 18:58:45.460 UTC","pid":"12","level":"LOG","msg":"C-0x557233534560: (nodb)/(nouser)@10.4.0.55:35668 registered new auto-database: postgres"}}
{"level":"info","ts":"2025-12-09T18:58:45.461816124Z","msg":"record","logger":"pgbouncer-manager","pipe":"stderr","record":{"timestamp":"2025-12-09 18:58:45.461 UTC","pid":"12","level":"LOG","msg":"S-0x5572335633e0: postgres/pg-cluster-rw@172.20.129.144:5432 new connection to server (from 10.4.6.47:38722)"}}
{"level":"info","ts":"2025-12-09T18:58:45.466677629Z","msg":"record","logger":"pgbouncer-manager","pipe":"stderr","record":{"timestamp":"2025-12-09 18:58:45.466 UTC","pid":"12","level":"LOG","msg":"S-0x5572335633e0: postgres/pg-cluster-rw@172.20.129.144:5432 SSL established: TLSv1.3/TLS_AES_256_GCM_SHA384"}}
{"level":"info","ts":"2025-12-09T18:58:45.46784964Z","msg":"record","logger":"pgbouncer-manager","pipe":"stderr","record":{"timestamp":"2025-12-09 18:58:45.467 UTC","pid":"12","level":"WARNING","msg":"tls_sbufio_recv: read failed: error:0A000413:SSL routines::ssl/tls alert unsupported certificate"}}
{"level":"info","ts":"2025-12-09T18:58:45.467884738Z","msg":"record","logger":"pgbouncer-manager","pipe":"stderr","record":{"timestamp":"2025-12-09 18:58:45.467 UTC","pid":"12","level":"LOG","msg":"S-0x5572335633e0: postgres/pg-cluster-rw@172.20.129.144:5432 closing because: server conn crashed? (age=0s)"}}Code of Conduct
- I agree to follow this project's Code of Conduct
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
bug 🐛Something isn't workingSomething isn't working
Type
Projects
Status
Done