Skip to content

Commit edf15b4

Browse files
Fix flaky test_overcommit_tracker/test_user_overcommit
The test is probabilistic: it relies on memory pressure to kill queries with low `memory_overcommit_ratio_denominator` while sparing those with high ratio. Under MSan (which has ~3x memory overhead), a single attempt could kill all queries, causing the assertion to fail. Two changes: - Reduce `numbers(2500000)` to `numbers(1000000)` to lower per-query memory usage, making it more likely that B queries survive. - Add a retry loop (up to 5 attempts) since the test is inherently probabilistic. https://s3.amazonaws.com/clickhouse-test-reports/json.html?PR=100404&sha=4e239671fdc3523725369f5ad6028b4a52c43b45&name_0=PR&name_1=Integration%20tests%20%28amd_msan%2C%204%2F6%29 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 6e7e0e1 commit edf15b4

1 file changed

Lines changed: 28 additions & 19 deletions

File tree

  • tests/integration/test_overcommit_tracker

tests/integration/test_overcommit_tracker/test.py

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,32 +22,41 @@ def start_cluster():
2222
cluster.shutdown()
2323

2424

25-
USER_TEST_QUERY_A = "SELECT groupArray(number) FROM numbers(2500000) SETTINGS max_memory_usage_for_user=2000000000,memory_overcommit_ratio_denominator=1"
26-
USER_TEST_QUERY_B = "SELECT groupArray(number) FROM numbers(2500000) SETTINGS max_memory_usage_for_user=2000000000,memory_overcommit_ratio_denominator=80000000"
25+
USER_TEST_QUERY_A = "SELECT groupArray(number) FROM numbers(1000000) SETTINGS max_memory_usage_for_user=2000000000,memory_overcommit_ratio_denominator=1"
26+
USER_TEST_QUERY_B = "SELECT groupArray(number) FROM numbers(1000000) SETTINGS max_memory_usage_for_user=2000000000,memory_overcommit_ratio_denominator=80000000"
2727

2828

2929
def test_user_overcommit():
3030
node.query("CREATE USER IF NOT EXISTS A")
3131
node.query("GRANT ALL ON *.* TO A")
3232

33-
responses_A = list()
34-
responses_B = list()
35-
for i in range(100):
36-
if i % 2 == 0:
37-
responses_A.append(node.get_query_request(USER_TEST_QUERY_A, user="A"))
38-
else:
39-
responses_B.append(node.get_query_request(USER_TEST_QUERY_B, user="A"))
40-
41-
overcommited_killed = False
42-
for response in responses_A:
43-
_, err = response.get_answer_and_error()
44-
if "MEMORY_LIMIT_EXCEEDED" in err:
45-
overcommited_killed = True
33+
# The test is probabilistic: it relies on memory pressure to kill queries
34+
# with low overcommit ratio while sparing those with high ratio.
35+
# Under sanitizers with higher memory overhead, a single attempt may fail,
36+
# so we retry a few times.
4637
finished = False
47-
for response in responses_B:
48-
_, err = response.get_answer_and_error()
49-
if err == "":
50-
finished = True
38+
for attempt in range(5):
39+
responses_A = list()
40+
responses_B = list()
41+
for i in range(100):
42+
if i % 2 == 0:
43+
responses_A.append(
44+
node.get_query_request(USER_TEST_QUERY_A, user="A")
45+
)
46+
else:
47+
responses_B.append(
48+
node.get_query_request(USER_TEST_QUERY_B, user="A")
49+
)
50+
51+
for response in responses_A:
52+
response.get_answer_and_error()
53+
for response in responses_B:
54+
_, err = response.get_answer_and_error()
55+
if err == "":
56+
finished = True
57+
58+
if finished:
59+
break
5160

5261
assert finished, "all tasks are killed"
5362

0 commit comments

Comments
 (0)