ClickHouse
diff --git a/‎.claude/CLAUDE.md‎
Lines changed: 4 additions & 0 deletions b/‎.claude/CLAUDE.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.github/workflows/master.yml‎
Lines changed: 91 additions & 11 deletions b/‎.github/workflows/master.yml‎
Lines changed: 91 additions & 11 deletions
diff --git a/‎.github/workflows/pull_request.yml‎
Lines changed: 186 additions & 106 deletions b/‎.github/workflows/pull_request.yml‎
Lines changed: 186 additions & 106 deletions
diff --git a/‎.gitmodules‎
Lines changed: 3 additions & 0 deletions b/‎.gitmodules‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎ci/defs/job_configs.py‎
Lines changed: 18 additions & 10 deletions b/‎ci/defs/job_configs.py‎
Lines changed: 18 additions & 10 deletions
diff --git a/‎ci/docker/integration/clickhouse_with_unity_catalog/Dockerfile‎
Lines changed: 11 additions & 3 deletions b/‎ci/docker/integration/clickhouse_with_unity_catalog/Dockerfile‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎ci/jobs/buzzhouse_job.py‎
Lines changed: 2 additions & 0 deletions b/‎ci/jobs/buzzhouse_job.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎ci/jobs/fast_test.py‎
Lines changed: 1 addition & 0 deletions b/‎ci/jobs/fast_test.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎ci/jobs/functional_tests.py‎
Lines changed: 7 additions & 2 deletions b/‎ci/jobs/functional_tests.py‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎ci/jobs/integration_test_job.py‎
Lines changed: 23 additions & 6 deletions b/‎ci/jobs/integration_test_job.py‎
Lines changed: 23 additions & 6 deletions
@@ -36,3 +36,7 @@ grep "test_name" ci/tmp/pytest_parallel.jsonl | python3 -c "import sys,json; [pr
 You can build multiple versions of ClickHouse inside `build_*` directories, such as `build`, `build_debug`, `build_asan`, etc.
 
 You can run integration tests as in `tests/integration/README.md` using: `python -m ci.praktika run "integration" --test <selectors>` invoked from the repository root.
+
+When writing tests, do not add "no-*" tags (like "no-parallel") unless strictly necessarily.
+
+When writing tests in tests/queries, prefer adding a new test instead of extending existing ones.
@@ -366,6 +366,9 @@
 [submodule "contrib/StringZilla"]
 	path = contrib/StringZilla
 	url = https://github.com/ClickHouse/StringZilla.git
+[submodule "contrib/simdcomp"]
+	path = contrib/simdcomp
+	url = https://github.com/fast-pack/simdcomp.git
 [submodule "contrib/antlr4-cpp-runtime"]
 	path = contrib/antlr4-cpp-runtime
 	url = https://github.com/antlr/antlr4.git
 
@@ -514,11 +514,15 @@ class JobConfigs:
             runs_on=RunnerLabels.AMD_SMALL,
             requires=[ArtifactNames.CH_AMD_DEBUG],
         ),
-        Job.ParamSet(
-            parameter=f"amd_tsan, parallel",
-            runs_on=RunnerLabels.AMD_LARGE,
-            requires=[ArtifactNames.CH_AMD_TSAN],
-        ),
+        *[
+            Job.ParamSet(
+                parameter=f"amd_tsan, parallel, {batch}/{total_batches}",
+                runs_on=RunnerLabels.AMD_LARGE,
+                requires=[ArtifactNames.CH_AMD_TSAN],
+            )
+            for total_batches in (2,)
+            for batch in range(1, total_batches + 1)
+        ],
         *[
             Job.ParamSet(
                 parameter=f"amd_tsan, sequential, {batch}/{total_batches}",
@@ -566,11 +570,15 @@ class JobConfigs:
             runs_on=RunnerLabels.AMD_SMALL,
             requires=[ArtifactNames.CH_AMD_DEBUG],
         ),
-        Job.ParamSet(
-            parameter="amd_tsan, s3 storage, parallel",
-            runs_on=RunnerLabels.AMD_MEDIUM,
-            requires=[ArtifactNames.CH_AMD_TSAN],
-        ),
+        *[
+            Job.ParamSet(
+                parameter=f"amd_tsan, s3 storage, parallel, {batch}/{total_batches}",
+                runs_on=RunnerLabels.AMD_MEDIUM,
+                requires=[ArtifactNames.CH_AMD_TSAN],
+            )
+            for total_batches in (2,)
+            for batch in range(1, total_batches + 1)
+        ],
         *[
             Job.ParamSet(
                 parameter=f"amd_tsan, s3 storage, sequential, {batch}/{total_batches}",
 
@@ -2,6 +2,9 @@
 ARG FROM_TAG=latest
 FROM clickhouse/integration-test:$FROM_TAG
 
+# Fix missing GPG key for kitware repository to prevent apt-get update failure
+RUN rm -f /etc/apt/sources.list.d/kitware.list || true
+
 RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get -y install openjdk-17-jdk-headless && update-alternatives --config java && update-alternatives --config javac
 
 # Clone unity catalog and hack their sample data because it contains inconsistent DeltaLake definition
@@ -17,6 +20,11 @@ RUN curl -fsSL -O https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-
 
 # download spark and packages
 # if you change packages, don't forget to update them in tests/integration/helpers/cluster.py
-RUN packages="org.apache.hadoop:hadoop-aws:3.3.4,io.delta:delta-spark_2.12:3.2.1,io.unitycatalog:unitycatalog-spark_2.12:0.2.0" \
-    /spark-3.5.4-bin-hadoop3/bin/spark-shell --packages "$packages" > /dev/null \
-    && find /root/.ivy2/ -name '*.jar' -exec ln -sf {} /spark-3.5.4-bin-hadoop3/jars/ \;
+# Using spark-sql with -e flag (instead of spark-shell) to ensure reliable automatic exit
+RUN packages="org.apache.hadoop:hadoop-aws:3.3.4,io.delta:delta-spark_2.12:3.2.1,io.unitycatalog:unitycatalog-spark_2.12:0.2.0" && \
+    cd /spark-3.5.4-bin-hadoop3 && \
+    for i in 1 2 3; do \
+        bin/spark-sql --packages "$packages" -e "SELECT 1;" > /dev/null 2>&1 && \
+        break || sleep 5; \
+    done && \
+    find /root/.ivy2/ -name '*.jar' -exec ln -sf {} /spark-3.5.4-bin-hadoop3/jars/ \;
@@ -221,6 +221,8 @@ def main():
         "allow_transactions": allow_transactions,
         # Run query oracles sometimes
         "allow_query_oracles": random.randint(1, 4) == 1,
+        # Run for 30 minutes max
+        "time_to_run": 30,
         "remote_servers": ["localhost:9000"],
         "remote_secure_servers": ["localhost:9440"],
         "http_servers": ["localhost:8123"],
 
@@ -50,6 +50,7 @@ def clone_submodules():
         "contrib/morton-nd",
         "contrib/xxHash",
         "contrib/simdjson",
+        "contrib/simdcomp",
         "contrib/liburing",
         "contrib/libfiu",
         "contrib/yaml-cpp",
 
@@ -1,7 +1,6 @@
 import argparse
 import os
 import random
-import re
 import subprocess
 from pathlib import Path
 
@@ -521,13 +520,19 @@ def start():
                             collected_test_results.append(test_case_result)
                             seen_test_names.add(test_case_result.name)
 
+                # Control elapsed time for targeted checks: exit if >30 minutes
+                stop_by_elapsed_time = False
+                if is_targeted_check and cnt > 0:
+                    stop_by_elapsed_time = stop_watch_.duration / 60 > 30
+
                 # On final run, replace results with collected ones
-                if is_final_run:
+                if is_final_run or stop_by_elapsed_time:
                     test_result.results = collected_test_results
                     # Set overall status to failed if any collected test cases failed
                     has_failures = any(not t.is_ok() for t in collected_test_results)
                     if has_failures and test_result.is_ok():
                         test_result.set_failed()
+                    break
 
         if not info.is_local_run:
             CH.stop_log_exports()
 
@@ -386,6 +386,13 @@ def main():
     failed_tests_files = []
 
     has_error = False
+    if not is_targeted_check:
+        session_timeout = 5400
+    else:
+        # For targeted jobs, use a shorter session timeout to keep feedback fast.
+        # If this timeout is exceeded but all completed tests have passed, the
+        # targeted check will not fail solely because the session timed out.
+        session_timeout = 1200
     error_info = []
 
     module_repeat_cnt = 1
@@ -398,7 +405,7 @@ def main():
         for attempt in range(module_repeat_cnt):
             log_file = f"{temp_path}/pytest_parallel.log"
             test_result_parallel = Result.from_pytest_run(
-                command=f"{' '.join(parallel_test_modules)} --report-log-exclude-logs-on-passed-tests -n {workers} --dist=loadfile --tb=short {repeat_option} --session-timeout=5400",
+                command=f"{' '.join(parallel_test_modules)} --report-log-exclude-logs-on-passed-tests -n {workers} --dist=loadfile --tb=short {repeat_option} --session-timeout={session_timeout}",
                 cwd="./tests/integration/",
                 env=test_env,
                 pytest_report_file=f"{temp_path}/pytest_parallel.jsonl",
@@ -416,15 +423,20 @@ def main():
         if test_result_parallel.files:
             failed_tests_files.extend(test_result_parallel.files)
         if test_result_parallel.is_error():
-            has_error = True
-            error_info.append(test_result_parallel.info)
+            if not is_targeted_check:
+                # In targeted checks we may overload the run with many or heavy tests
+                # (--count N is used). In this mode, a session-timeout is an expected risk
+                # rather than an infrastructure problem, so we do not treat such errors as job-level
+                # failures and avoid setting the error flag for targeted runs.
+                has_error = True
+                error_info.append(test_result_parallel.info)
 
     fail_num = len([r for r in test_results if not r.is_ok()])
     if sequential_test_modules and fail_num < MAX_FAILS_BEFORE_DROP and not has_error:
         for attempt in range(module_repeat_cnt):
             log_file = f"{temp_path}/pytest_sequential.log"
             test_result_sequential = Result.from_pytest_run(
-                command=f"{' '.join(sequential_test_modules)} --report-log-exclude-logs-on-passed-tests --tb=short {repeat_option} -n 1 --dist=loadfile --session-timeout=5400",
+                command=f"{' '.join(sequential_test_modules)} --report-log-exclude-logs-on-passed-tests --tb=short {repeat_option} -n 1 --dist=loadfile --session-timeout={session_timeout}",
                 env=test_env,
                 cwd="./tests/integration/",
                 pytest_report_file=f"{temp_path}/pytest_sequential.jsonl",
@@ -442,8 +454,13 @@ def main():
         if test_result_sequential.files:
             failed_tests_files.extend(test_result_sequential.files)
         if test_result_sequential.is_error():
-            has_error = True
-            error_info.append(test_result_sequential.info)
+            if not is_targeted_check:
+                # In targeted checks we may overload the run with many or heavy tests
+                # (--count N is used). In this mode, a session-timeout is an expected risk
+                # rather than an infrastructure problem, so we do not treat such errors as job-level
+                # failures and avoid setting the error flag for targeted runs.
+                has_error = True
+                error_info.append(test_result_sequential.info)
 
     # Collect logs before re-run
     attached_files = []