Skip to content

Commit fd39cd3

Browse files
author
Thomas Hardy
committed
sql: add troubleshooting mode session variable
Resolves: #84429 This change introduces a `troubleshooting_mode_enabled` session variable. When enabled, this session variable is intended to be used as a way to avoid performing additional work on queries, particularly when the cluster is experiencing issues/unavailability/failure. By default, this session variable is disabled. Currently, this session variable is only used to avoid collecting/emitting telemetry data. Release note (sql change): Introduce new `troubleshooting_mode_enabled` session variable, to avoid doing additional work on queries when possible (i.e. collection telemetry data). By default, this session variable is disabled.
1 parent 6ea03b9 commit fd39cd3

10 files changed

Lines changed: 159 additions & 1 deletion

File tree

pkg/cli/clisqlcfg/context.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,9 @@ func (c *Context) Run(ctx context.Context, conn clisqlclient.Conn) error {
210210
if err := c.maybeSetReadOnly(conn); err != nil {
211211
return err
212212
}
213+
if err := c.maybeSetTroubleshootingMode(conn); err != nil {
214+
return err
215+
}
213216

214217
if c.ConnCtx.DebugMode {
215218
fmt.Fprintln(c.CmdOut,
@@ -250,3 +253,13 @@ func (c *Context) maybeSetReadOnly(conn clisqlclient.Conn) error {
250253
return conn.Exec(context.Background(),
251254
"SET default_transaction_read_only = TRUE")
252255
}
256+
257+
func (c *Context) maybeSetTroubleshootingMode(conn clisqlclient.Conn) error {
258+
if !c.ConnCtx.DebugMode {
259+
return nil
260+
}
261+
// If we are in debug mode, enable "troubleshooting mode".
262+
return conn.Exec(
263+
context.Background(),
264+
"SET troubleshooting_mode = on")
265+
}

pkg/cli/interactive_tests/test_client_side_checking.tcl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,11 @@ eexpect "\"\".crdb_internal.node_build_info"
8787
eexpect "you might want to set the current database"
8888
eexpect "to the empty string"
8989

90+
# Check that troubleshooting mode is enabled in debug mode.
91+
eexpect "root@"
92+
send "show troubleshooting_mode;\r"
93+
eexpect "on"
94+
9095
eexpect "root@"
9196
send "\\set display_format csv\r\\set\r"
9297
eexpect "check_syntax,false"

pkg/sql/exec_log.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,7 @@ func (p *planner) maybeLogStatementInternal(
375375
p.logEventsOnlyExternally(ctx, eventLogEntry{event: &eventpb.AdminQuery{CommonSQLExecDetails: execDetails}})
376376
}
377377

378-
if telemetryLoggingEnabled {
378+
if telemetryLoggingEnabled && !p.SessionData().TroubleshootingMode {
379379
// We only log to the telemetry channel if enough time has elapsed from
380380
// the last event emission.
381381
requiredTimeElapsed := 1.0 / float64(maxEventFrequency)

pkg/sql/exec_util.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3251,6 +3251,10 @@ func (m *sessionDataMutator) SetUnconstrainedNonCoveringIndexScanEnabled(val boo
32513251
m.data.UnconstrainedNonCoveringIndexScanEnabled = val
32523252
}
32533253

3254+
func (m *sessionDataMutator) SetTroubleshootingModeEnabled(val bool) {
3255+
m.data.TroubleshootingMode = val
3256+
}
3257+
32543258
// Utility functions related to scrubbing sensitive information on SQL Stats.
32553259

32563260
// quantizeCounts ensures that the Count field in the

pkg/sql/logictest/testdata/logic_test/information_schema

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4718,6 +4718,7 @@ transaction_rows_read_log 0
47184718
transaction_rows_written_err 0
47194719
transaction_rows_written_log 0
47204720
transaction_status NoTxn
4721+
troubleshooting_mode off
47214722
unconstrained_non_covering_index_scan_enabled off
47224723
xmloption content
47234724

pkg/sql/logictest/testdata/logic_test/pg_catalog

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4243,6 +4243,7 @@ transaction_rows_read_log 0 NULL
42434243
transaction_rows_written_err 0 NULL NULL NULL string
42444244
transaction_rows_written_log 0 NULL NULL NULL string
42454245
transaction_status NoTxn NULL NULL NULL string
4246+
troubleshooting_mode off NULL NULL NULL string
42464247
unconstrained_non_covering_index_scan_enabled off NULL NULL NULL string
42474248
use_declarative_schema_changer on NULL NULL NULL string
42484249
vectorize on NULL NULL NULL string
@@ -4367,6 +4368,7 @@ transaction_rows_read_log 0 NULL
43674368
transaction_rows_written_err 0 NULL user NULL 0 0
43684369
transaction_rows_written_log 0 NULL user NULL 0 0
43694370
transaction_status NoTxn NULL user NULL NoTxn NoTxn
4371+
troubleshooting_mode off NULL user NULL off off
43704372
unconstrained_non_covering_index_scan_enabled off NULL user NULL off off
43714373
use_declarative_schema_changer on NULL user NULL on on
43724374
vectorize on NULL user NULL on on
@@ -4489,6 +4491,7 @@ transaction_rows_read_log NULL NULL NULL
44894491
transaction_rows_written_err NULL NULL NULL NULL NULL
44904492
transaction_rows_written_log NULL NULL NULL NULL NULL
44914493
transaction_status NULL NULL NULL NULL NULL
4494+
troubleshooting_mode NULL NULL NULL NULL NULL
44924495
unconstrained_non_covering_index_scan_enabled NULL NULL NULL NULL NULL
44934496
use_declarative_schema_changer NULL NULL NULL NULL NULL
44944497
vectorize NULL NULL NULL NULL NULL

pkg/sql/logictest/testdata/logic_test/show_source

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ transaction_rows_read_log 0
135135
transaction_rows_written_err 0
136136
transaction_rows_written_log 0
137137
transaction_status NoTxn
138+
troubleshooting_mode off
138139
unconstrained_non_covering_index_scan_enabled off
139140
use_declarative_schema_changer on
140141
vectorize on

pkg/sql/sessiondatapb/session_data.proto

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,11 @@ message SessionData {
8989
// TrigramSimilarityThreshold configures the value that's used to compare
9090
// trigram similarities to in order to evaluate the string % string overload.
9191
double trigram_similarity_threshold = 20;
92+
93+
// Troubleshooting mode determines whether we refuse to do additional work with
94+
// the query (i.e. collect & emit telemetry data). Troubleshooting mode is
95+
// disabled by default.
96+
bool troubleshooting_mode = 21;
9297
}
9398

9499
// DataConversionConfig contains the parameters that influence the output

pkg/sql/telemetry_logging_test.go

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,3 +284,112 @@ func TestTelemetryLogging(t *testing.T) {
284284
}
285285
}
286286
}
287+
288+
func TestNoTelemetryLogOnTroubleshootMode(t *testing.T) {
289+
defer leaktest.AfterTest(t)()
290+
sc := log.ScopeWithoutShowLogs(t)
291+
defer sc.Close(t)
292+
293+
cleanup := installTelemetryLogFileSink(sc, t)
294+
defer cleanup()
295+
296+
st := stubTime{}
297+
298+
s, sqlDB, _ := serverutils.StartServer(t, base.TestServerArgs{
299+
Knobs: base.TestingKnobs{
300+
TelemetryLoggingKnobs: &TelemetryLoggingTestingKnobs{
301+
getTimeNow: st.TimeNow,
302+
},
303+
},
304+
})
305+
db := sqlutils.MakeSQLRunner(sqlDB)
306+
defer s.Stopper().Stop(context.Background())
307+
308+
db.Exec(t, `SET CLUSTER SETTING sql.telemetry.query_sampling.enabled = true;`)
309+
db.Exec(t, "CREATE TABLE t();")
310+
311+
stubMaxEventFrequency := int64(1)
312+
telemetryMaxEventFrequency.Override(context.Background(), &s.ClusterSettings().SV, stubMaxEventFrequency)
313+
314+
/*
315+
Testing Cases:
316+
- run query when troubleshoot mode is enabled
317+
- ensure no log appears
318+
- run another query when troubleshoot mode is disabled
319+
- ensure log appears
320+
*/
321+
testData := []struct {
322+
name string
323+
query string
324+
expectedLogStatement string
325+
enableTroubleshootingMode bool
326+
expectedNumLogs int
327+
}{
328+
{
329+
"select-troubleshooting-enabled",
330+
"SELECT * FROM t LIMIT 1;",
331+
`SELECT * FROM \"\".\"\".t LIMIT ‹1›`,
332+
true,
333+
0,
334+
},
335+
{
336+
"select-troubleshooting-disabled",
337+
"SELECT * FROM t LIMIT 2;",
338+
`SELECT * FROM \"\".\"\".t LIMIT ‹2›`,
339+
false,
340+
1,
341+
},
342+
}
343+
344+
for idx, tc := range testData {
345+
// Set the time for when we issue a query to enable/disable
346+
// troubleshooting mode.
347+
setTroubleshootModeTime := timeutil.FromUnixMicros(int64(idx * 1e6))
348+
st.setTime(setTroubleshootModeTime)
349+
if tc.enableTroubleshootingMode {
350+
db.Exec(t, `SET troubleshooting_mode = true;`)
351+
} else {
352+
db.Exec(t, `SET troubleshooting_mode = false;`)
353+
}
354+
// Advance time 1 second from previous query. Ensure enough time has passed
355+
// from when we set troubleshooting mode for this query to be sampled.
356+
setQueryTime := timeutil.FromUnixMicros(int64((idx + 1) * 1e6))
357+
st.setTime(setQueryTime)
358+
db.Exec(t, tc.query)
359+
}
360+
361+
log.Flush()
362+
363+
entries, err := log.FetchEntriesFromFiles(
364+
0,
365+
math.MaxInt64,
366+
10000,
367+
regexp.MustCompile(`"EventType":"sampled_query"`),
368+
log.WithMarkedSensitiveData,
369+
)
370+
371+
if err != nil {
372+
t.Fatal(err)
373+
}
374+
375+
if len(entries) == 0 {
376+
t.Fatal(errors.Newf("no entries found"))
377+
}
378+
379+
for _, tc := range testData {
380+
numLogsFound := 0
381+
for i := len(entries) - 1; i >= 0; i-- {
382+
e := entries[i]
383+
if strings.Contains(e.Message, tc.expectedLogStatement) {
384+
if tc.enableTroubleshootingMode {
385+
t.Errorf("%s: unexpected log entry when troubleshooting mode enabled:\n%s", tc.name, entries[0].Message)
386+
} else {
387+
numLogsFound++
388+
}
389+
}
390+
}
391+
if numLogsFound != tc.expectedNumLogs {
392+
t.Errorf("%s: expected %d log entries, found %d", tc.name, tc.expectedNumLogs, numLogsFound)
393+
}
394+
}
395+
}

pkg/sql/vars.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1160,6 +1160,23 @@ var varGen = map[string]sessionVar{
11601160
},
11611161
},
11621162

1163+
// CockroachDB extension.
1164+
`troubleshooting_mode`: {
1165+
GetStringVal: makePostgresBoolGetStringValFn(`troubleshooting_mode`),
1166+
Set: func(_ context.Context, m sessionDataMutator, s string) error {
1167+
b, err := paramparse.ParseBoolVar("troubleshooting_mode", s)
1168+
if err != nil {
1169+
return err
1170+
}
1171+
m.SetTroubleshootingModeEnabled(b)
1172+
return nil
1173+
},
1174+
Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) {
1175+
return formatBoolAsPostgresSetting(evalCtx.SessionData().TroubleshootingMode), nil
1176+
},
1177+
GlobalDefault: globalFalse,
1178+
},
1179+
11631180
// This is read-only in Postgres also.
11641181
// See https://www.postgresql.org/docs/14/sql-show.html and
11651182
// https://www.postgresql.org/docs/14/locale.html

0 commit comments

Comments
 (0)