Skip to content

Commit 2114678

Browse files
a-robinsonirfansharif
authored andcommitted
gossip: remove frequent gossiping of gossip client connections
These gossip-clients keys make up two thirds or more of the gossip traffic in various large clusters I've inspected, consuming almost an entire CPU core in the worst case I've seen. They don't provide enough value to justify that sort of ongoing cost, so this commit removes them entirely as well as the periodic logging of the gossip network and the crdb_internal.gossip_network table, both of which relied on them. Release note (backward-incompatible change): We've stopped supporting/populating the crdb_internal.gossip_network table. It was an internal table with no API guarantees (so perhaps no meriting a release note?). Release note (performance improvement): Significantly reduced CPU usage of the underlying gossip network in large clusters.
1 parent 58739fe commit 2114678

17 files changed

Lines changed: 40 additions & 224 deletions

pkg/cli/debug.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,8 +1069,6 @@ func parseGossipValues(gossipInfo *gossip.InfoStatus) (string, error) {
10691069
return "", errors.Wrapf(err, "failed to parse value for key %q", key)
10701070
}
10711071
output = append(output, fmt.Sprintf("%q: %+v", key, drainingInfo))
1072-
} else if strings.HasPrefix(key, gossip.KeyGossipClientsPrefix) {
1073-
output = append(output, fmt.Sprintf("%q: %v", key, string(bytes)))
10741072
}
10751073
}
10761074

pkg/cli/testdata/zip/partial1

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ debug zip --concurrency=1 --cpu-profile-duration=0s /dev/null
7878
[node 1] retrieving SQL data for crdb_internal.feature_usage... writing output: debug/nodes/1/crdb_internal.feature_usage.txt... done
7979
[node 1] retrieving SQL data for crdb_internal.gossip_alerts... writing output: debug/nodes/1/crdb_internal.gossip_alerts.txt... done
8080
[node 1] retrieving SQL data for crdb_internal.gossip_liveness... writing output: debug/nodes/1/crdb_internal.gossip_liveness.txt... done
81-
[node 1] retrieving SQL data for crdb_internal.gossip_network... writing output: debug/nodes/1/crdb_internal.gossip_network.txt... done
8281
[node 1] retrieving SQL data for crdb_internal.gossip_nodes... writing output: debug/nodes/1/crdb_internal.gossip_nodes.txt... done
8382
[node 1] retrieving SQL data for crdb_internal.leases... writing output: debug/nodes/1/crdb_internal.leases.txt... done
8483
[node 1] retrieving SQL data for crdb_internal.node_build_info... writing output: debug/nodes/1/crdb_internal.node_build_info.txt... done
@@ -179,9 +178,6 @@ debug zip --concurrency=1 --cpu-profile-duration=0s /dev/null
179178
[node 2] retrieving SQL data for crdb_internal.gossip_liveness... writing output: debug/nodes/2/crdb_internal.gossip_liveness.txt...
180179
[node 2] retrieving SQL data for crdb_internal.gossip_liveness: last request failed: dial tcp ...
181180
[node 2] retrieving SQL data for crdb_internal.gossip_liveness: creating error output: debug/nodes/2/crdb_internal.gossip_liveness.txt.err.txt... done
182-
[node 2] retrieving SQL data for crdb_internal.gossip_network... writing output: debug/nodes/2/crdb_internal.gossip_network.txt...
183-
[node 2] retrieving SQL data for crdb_internal.gossip_network: last request failed: dial tcp ...
184-
[node 2] retrieving SQL data for crdb_internal.gossip_network: creating error output: debug/nodes/2/crdb_internal.gossip_network.txt.err.txt... done
185181
[node 2] retrieving SQL data for crdb_internal.gossip_nodes... writing output: debug/nodes/2/crdb_internal.gossip_nodes.txt...
186182
[node 2] retrieving SQL data for crdb_internal.gossip_nodes: last request failed: dial tcp ...
187183
[node 2] retrieving SQL data for crdb_internal.gossip_nodes: creating error output: debug/nodes/2/crdb_internal.gossip_nodes.txt.err.txt... done
@@ -263,7 +259,6 @@ debug zip --concurrency=1 --cpu-profile-duration=0s /dev/null
263259
[node 3] retrieving SQL data for crdb_internal.feature_usage... writing output: debug/nodes/3/crdb_internal.feature_usage.txt... done
264260
[node 3] retrieving SQL data for crdb_internal.gossip_alerts... writing output: debug/nodes/3/crdb_internal.gossip_alerts.txt... done
265261
[node 3] retrieving SQL data for crdb_internal.gossip_liveness... writing output: debug/nodes/3/crdb_internal.gossip_liveness.txt... done
266-
[node 3] retrieving SQL data for crdb_internal.gossip_network... writing output: debug/nodes/3/crdb_internal.gossip_network.txt... done
267262
[node 3] retrieving SQL data for crdb_internal.gossip_nodes... writing output: debug/nodes/3/crdb_internal.gossip_nodes.txt... done
268263
[node 3] retrieving SQL data for crdb_internal.leases... writing output: debug/nodes/3/crdb_internal.leases.txt... done
269264
[node 3] retrieving SQL data for crdb_internal.node_build_info... writing output: debug/nodes/3/crdb_internal.node_build_info.txt... done

pkg/cli/testdata/zip/partial1_excluded

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ debug zip /dev/null --concurrency=1 --exclude-nodes=2 --cpu-profile-duration=0
7878
[node 1] retrieving SQL data for crdb_internal.feature_usage... writing output: debug/nodes/1/crdb_internal.feature_usage.txt... done
7979
[node 1] retrieving SQL data for crdb_internal.gossip_alerts... writing output: debug/nodes/1/crdb_internal.gossip_alerts.txt... done
8080
[node 1] retrieving SQL data for crdb_internal.gossip_liveness... writing output: debug/nodes/1/crdb_internal.gossip_liveness.txt... done
81-
[node 1] retrieving SQL data for crdb_internal.gossip_network... writing output: debug/nodes/1/crdb_internal.gossip_network.txt... done
8281
[node 1] retrieving SQL data for crdb_internal.gossip_nodes... writing output: debug/nodes/1/crdb_internal.gossip_nodes.txt... done
8382
[node 1] retrieving SQL data for crdb_internal.leases... writing output: debug/nodes/1/crdb_internal.leases.txt... done
8483
[node 1] retrieving SQL data for crdb_internal.node_build_info... writing output: debug/nodes/1/crdb_internal.node_build_info.txt... done
@@ -172,7 +171,6 @@ debug zip /dev/null --concurrency=1 --exclude-nodes=2 --cpu-profile-duration=0
172171
[node 3] retrieving SQL data for crdb_internal.feature_usage... writing output: debug/nodes/3/crdb_internal.feature_usage.txt... done
173172
[node 3] retrieving SQL data for crdb_internal.gossip_alerts... writing output: debug/nodes/3/crdb_internal.gossip_alerts.txt... done
174173
[node 3] retrieving SQL data for crdb_internal.gossip_liveness... writing output: debug/nodes/3/crdb_internal.gossip_liveness.txt... done
175-
[node 3] retrieving SQL data for crdb_internal.gossip_network... writing output: debug/nodes/3/crdb_internal.gossip_network.txt... done
176174
[node 3] retrieving SQL data for crdb_internal.gossip_nodes... writing output: debug/nodes/3/crdb_internal.gossip_nodes.txt... done
177175
[node 3] retrieving SQL data for crdb_internal.leases... writing output: debug/nodes/3/crdb_internal.leases.txt... done
178176
[node 3] retrieving SQL data for crdb_internal.node_build_info... writing output: debug/nodes/3/crdb_internal.node_build_info.txt... done

pkg/cli/testdata/zip/partial2

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ debug zip --concurrency=1 --cpu-profile-duration=0 /dev/null
7878
[node 1] retrieving SQL data for crdb_internal.feature_usage... writing output: debug/nodes/1/crdb_internal.feature_usage.txt... done
7979
[node 1] retrieving SQL data for crdb_internal.gossip_alerts... writing output: debug/nodes/1/crdb_internal.gossip_alerts.txt... done
8080
[node 1] retrieving SQL data for crdb_internal.gossip_liveness... writing output: debug/nodes/1/crdb_internal.gossip_liveness.txt... done
81-
[node 1] retrieving SQL data for crdb_internal.gossip_network... writing output: debug/nodes/1/crdb_internal.gossip_network.txt... done
8281
[node 1] retrieving SQL data for crdb_internal.gossip_nodes... writing output: debug/nodes/1/crdb_internal.gossip_nodes.txt... done
8382
[node 1] retrieving SQL data for crdb_internal.leases... writing output: debug/nodes/1/crdb_internal.leases.txt... done
8483
[node 1] retrieving SQL data for crdb_internal.node_build_info... writing output: debug/nodes/1/crdb_internal.node_build_info.txt... done
@@ -171,7 +170,6 @@ debug zip --concurrency=1 --cpu-profile-duration=0 /dev/null
171170
[node 3] retrieving SQL data for crdb_internal.feature_usage... writing output: debug/nodes/3/crdb_internal.feature_usage.txt... done
172171
[node 3] retrieving SQL data for crdb_internal.gossip_alerts... writing output: debug/nodes/3/crdb_internal.gossip_alerts.txt... done
173172
[node 3] retrieving SQL data for crdb_internal.gossip_liveness... writing output: debug/nodes/3/crdb_internal.gossip_liveness.txt... done
174-
[node 3] retrieving SQL data for crdb_internal.gossip_network... writing output: debug/nodes/3/crdb_internal.gossip_network.txt... done
175173
[node 3] retrieving SQL data for crdb_internal.gossip_nodes... writing output: debug/nodes/3/crdb_internal.gossip_nodes.txt... done
176174
[node 3] retrieving SQL data for crdb_internal.leases... writing output: debug/nodes/3/crdb_internal.leases.txt... done
177175
[node 3] retrieving SQL data for crdb_internal.node_build_info... writing output: debug/nodes/3/crdb_internal.node_build_info.txt... done

pkg/cli/testdata/zip/testzip

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ debug zip --concurrency=1 --cpu-profile-duration=1s /dev/null
8181
[node 1] retrieving SQL data for crdb_internal.feature_usage... writing output: debug/nodes/1/crdb_internal.feature_usage.txt... done
8282
[node 1] retrieving SQL data for crdb_internal.gossip_alerts... writing output: debug/nodes/1/crdb_internal.gossip_alerts.txt... done
8383
[node 1] retrieving SQL data for crdb_internal.gossip_liveness... writing output: debug/nodes/1/crdb_internal.gossip_liveness.txt... done
84-
[node 1] retrieving SQL data for crdb_internal.gossip_network... writing output: debug/nodes/1/crdb_internal.gossip_network.txt... done
8584
[node 1] retrieving SQL data for crdb_internal.gossip_nodes... writing output: debug/nodes/1/crdb_internal.gossip_nodes.txt... done
8685
[node 1] retrieving SQL data for crdb_internal.leases... writing output: debug/nodes/1/crdb_internal.leases.txt... done
8786
[node 1] retrieving SQL data for crdb_internal.node_build_info... writing output: debug/nodes/1/crdb_internal.node_build_info.txt... done

pkg/cli/testdata/zip/testzip_concurrent

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -292,9 +292,6 @@ zip
292292
[node 1] retrieving SQL data for crdb_internal.gossip_liveness...
293293
[node 1] retrieving SQL data for crdb_internal.gossip_liveness: done
294294
[node 1] retrieving SQL data for crdb_internal.gossip_liveness: writing output: debug/nodes/1/crdb_internal.gossip_liveness.txt...
295-
[node 1] retrieving SQL data for crdb_internal.gossip_network...
296-
[node 1] retrieving SQL data for crdb_internal.gossip_network: done
297-
[node 1] retrieving SQL data for crdb_internal.gossip_network: writing output: debug/nodes/1/crdb_internal.gossip_network.txt...
298295
[node 1] retrieving SQL data for crdb_internal.gossip_nodes...
299296
[node 1] retrieving SQL data for crdb_internal.gossip_nodes: done
300297
[node 1] retrieving SQL data for crdb_internal.gossip_nodes: writing output: debug/nodes/1/crdb_internal.gossip_nodes.txt...
@@ -622,9 +619,6 @@ zip
622619
[node 2] retrieving SQL data for crdb_internal.gossip_liveness...
623620
[node 2] retrieving SQL data for crdb_internal.gossip_liveness: done
624621
[node 2] retrieving SQL data for crdb_internal.gossip_liveness: writing output: debug/nodes/2/crdb_internal.gossip_liveness.txt...
625-
[node 2] retrieving SQL data for crdb_internal.gossip_network...
626-
[node 2] retrieving SQL data for crdb_internal.gossip_network: done
627-
[node 2] retrieving SQL data for crdb_internal.gossip_network: writing output: debug/nodes/2/crdb_internal.gossip_network.txt...
628622
[node 2] retrieving SQL data for crdb_internal.gossip_nodes...
629623
[node 2] retrieving SQL data for crdb_internal.gossip_nodes: done
630624
[node 2] retrieving SQL data for crdb_internal.gossip_nodes: writing output: debug/nodes/2/crdb_internal.gossip_nodes.txt...
@@ -952,9 +946,6 @@ zip
952946
[node 3] retrieving SQL data for crdb_internal.gossip_liveness...
953947
[node 3] retrieving SQL data for crdb_internal.gossip_liveness: done
954948
[node 3] retrieving SQL data for crdb_internal.gossip_liveness: writing output: debug/nodes/3/crdb_internal.gossip_liveness.txt...
955-
[node 3] retrieving SQL data for crdb_internal.gossip_network...
956-
[node 3] retrieving SQL data for crdb_internal.gossip_network: done
957-
[node 3] retrieving SQL data for crdb_internal.gossip_network: writing output: debug/nodes/3/crdb_internal.gossip_network.txt...
958949
[node 3] retrieving SQL data for crdb_internal.gossip_nodes...
959950
[node 3] retrieving SQL data for crdb_internal.gossip_nodes: done
960951
[node 3] retrieving SQL data for crdb_internal.gossip_nodes: writing output: debug/nodes/3/crdb_internal.gossip_nodes.txt...

pkg/cli/testdata/zip/testzip_tenant

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,6 @@ debug zip --concurrency=1 --cpu-profile-duration=1s /dev/null
110110
[node 1] retrieving SQL data for crdb_internal.gossip_liveness... writing output: debug/nodes/1/crdb_internal.gossip_liveness.txt...
111111
[node 1] retrieving SQL data for crdb_internal.gossip_liveness: last request failed: ERROR: unimplemented: operation is unsupported in multi-tenancy mode (SQLSTATE 0A000)
112112
[node 1] retrieving SQL data for crdb_internal.gossip_liveness: creating error output: debug/nodes/1/crdb_internal.gossip_liveness.txt.err.txt... done
113-
[node 1] retrieving SQL data for crdb_internal.gossip_network... writing output: debug/nodes/1/crdb_internal.gossip_network.txt...
114-
[node 1] retrieving SQL data for crdb_internal.gossip_network: last request failed: ERROR: unimplemented: operation is unsupported in multi-tenancy mode (SQLSTATE 0A000)
115-
[node 1] retrieving SQL data for crdb_internal.gossip_network: creating error output: debug/nodes/1/crdb_internal.gossip_network.txt.err.txt... done
116113
[node 1] retrieving SQL data for crdb_internal.gossip_nodes... writing output: debug/nodes/1/crdb_internal.gossip_nodes.txt...
117114
[node 1] retrieving SQL data for crdb_internal.gossip_nodes: last request failed: ERROR: unimplemented: operation is unsupported in multi-tenancy mode (SQLSTATE 0A000)
118115
[node 1] retrieving SQL data for crdb_internal.gossip_nodes: creating error output: debug/nodes/1/crdb_internal.gossip_nodes.txt.err.txt... done

pkg/cli/testdata/zip/unavailable

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ debug zip --concurrency=1 --cpu-profile-duration=0 /dev/null --timeout=.5s
9090
[node 1] retrieving SQL data for crdb_internal.feature_usage... writing output: debug/nodes/1/crdb_internal.feature_usage.txt... done
9191
[node 1] retrieving SQL data for crdb_internal.gossip_alerts... writing output: debug/nodes/1/crdb_internal.gossip_alerts.txt... done
9292
[node 1] retrieving SQL data for crdb_internal.gossip_liveness... writing output: debug/nodes/1/crdb_internal.gossip_liveness.txt... done
93-
[node 1] retrieving SQL data for crdb_internal.gossip_network... writing output: debug/nodes/1/crdb_internal.gossip_network.txt... done
9493
[node 1] retrieving SQL data for crdb_internal.gossip_nodes... writing output: debug/nodes/1/crdb_internal.gossip_nodes.txt... done
9594
[node 1] retrieving SQL data for crdb_internal.leases... writing output: debug/nodes/1/crdb_internal.leases.txt... done
9695
[node 1] retrieving SQL data for crdb_internal.node_build_info... writing output: debug/nodes/1/crdb_internal.node_build_info.txt... done

pkg/cli/zip_table_registry.go

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -533,12 +533,6 @@ var zipInternalTablesPerNode = DebugZipTableRegistry{
533533
"updated_at",
534534
},
535535
},
536-
"crdb_internal.gossip_network": {
537-
nonSensitiveCols: NonSensitiveColumns{
538-
"source_id",
539-
"target_id",
540-
},
541-
},
542536
"crdb_internal.gossip_nodes": {
543537
// `cluster_name` is hashed as we only care to see whether values are
544538
// identical across nodes.

pkg/cli/zip_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ table_name NOT IN (
7676
'cross_db_references',
7777
'databases',
7878
'forward_dependencies',
79+
'gossip_network',
7980
'index_columns',
8081
'kv_catalog_comments',
8182
'kv_catalog_descriptor',

0 commit comments

Comments
 (0)