Skip to content

Commit 3d5ece6

Browse files
Add Hermetic C++ Toolchains for XLA project.
Hermetic C++/CUDA toolchains are enabled for Linux x86_64 platform by default. List of covered OSs will be extended in a few closest months. Developers still could use non hermetic toolchains with help of --config=clang_local flag. Example 1: Run CPU tests The following command with env variables CC, CXX was used bazel test --build_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd --test_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd \ --repo_env=CC=/usr/lib/llvm-18/bin/clang \ --repo_env=CXX=/usr/lib/llvm-18/bin/clang++ \ -- //xla/... //build_tools/... @tsl//tsl/... After, for hermetic build we use command without env variables CC and CXX bazel test --build_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd --test_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd \ -- //xla/... //build_tools/... @tsl//tsl/... For non-hermetic build we use command with flag "--config=clang_local" and env variables CC, CXX bazel test --build_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd --test_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd \ --config=clang_local \ --repo_env=CC=/usr/lib/llvm-18/bin/clang \ --repo_env=CXX=/usr/lib/llvm-18/bin/clang++ \ -- //xla/... //build_tools/... @tsl//tsl/... Example 2: Run CPU tests for x86_64 architecture with remote execution The following command was used bazel test --build_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd --test_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd \ --config=rbe_linux_cpu \ -- //xla/... //build_tools/... @tsl//tsl/... After, for hermetic build we use the same command. For non-hermetic remote builds we temporarily add config rbe_linux_cpu_clang_local with deprecated parameters bazel test --build_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd --test_tag_filters=-no_oss,-gpu,-requires-gpu-nvidia,-requires-gpu-amd \ --config=rbe_linux_cpu_clang_local \ -- //xla/... //build_tools/... @tsl//tsl/... We don't support other "rbe_*_clang_local" configuration flags, and you could construct your own combinations. PiperOrigin-RevId: 772530381
1 parent f0939ff commit 3d5ece6

4 files changed

Lines changed: 51 additions & 15 deletions

File tree

.bazelrc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
# Disable Bzlmod for now
22
common --noenable_bzlmod
33

4+
build --incompatible_enable_cc_toolchain_resolution
5+
build --repo_env USE_HERMETIC_CC_TOOLCHAIN=1
6+
47
# TODO: Migrate for https://github.com/bazelbuild/bazel/issues/7260
5-
common --noincompatible_enable_cc_toolchain_resolution
8+
build:clang_local --noincompatible_enable_cc_toolchain_resolution
9+
build:clang_local --repo_env USE_HERMETIC_CC_TOOLCHAIN=0
610

711
# Load the TensorFlow bazelrc
812
import %workspace%/tensorflow.bazelrc

WORKSPACE

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ load(":workspace0.bzl", "xla_workspace0")
5454
xla_workspace0()
5555

5656
load(
57-
"//third_party/gpus/cuda/hermetic:cuda_json_init_repository.bzl",
57+
"@rules_ml_toolchain//third_party/gpus/cuda/hermetic:cuda_json_init_repository.bzl",
5858
"cuda_json_init_repository",
5959
)
6060

@@ -66,7 +66,7 @@ load(
6666
"CUDNN_REDISTRIBUTIONS",
6767
)
6868
load(
69-
"//third_party/gpus/cuda/hermetic:cuda_redist_init_repositories.bzl",
69+
"@rules_ml_toolchain//third_party/gpus/cuda/hermetic:cuda_redist_init_repositories.bzl",
7070
"cuda_redist_init_repositories",
7171
"cudnn_redist_init_repository",
7272
)
@@ -80,28 +80,28 @@ cudnn_redist_init_repository(
8080
)
8181

8282
load(
83-
"//third_party/gpus/cuda/hermetic:cuda_configure.bzl",
83+
"@rules_ml_toolchain//third_party/gpus/cuda/hermetic:cuda_configure.bzl",
8484
"cuda_configure",
8585
)
8686

8787
cuda_configure(name = "local_config_cuda")
8888

8989
load(
90-
"//third_party/nccl/hermetic:nccl_redist_init_repository.bzl",
90+
"@rules_ml_toolchain//third_party/nccl/hermetic:nccl_redist_init_repository.bzl",
9191
"nccl_redist_init_repository",
9292
)
9393

9494
nccl_redist_init_repository()
9595

9696
load(
97-
"//third_party/nccl/hermetic:nccl_configure.bzl",
97+
"@rules_ml_toolchain//third_party/nccl/hermetic:nccl_configure.bzl",
9898
"nccl_configure",
9999
)
100100

101101
nccl_configure(name = "local_config_nccl")
102102

103103
load(
104-
"//third_party/nvshmem/hermetic:nvshmem_json_init_repository.bzl",
104+
"@rules_ml_toolchain//third_party/nvshmem/hermetic:nvshmem_json_init_repository.bzl",
105105
"nvshmem_json_init_repository",
106106
)
107107

@@ -112,7 +112,7 @@ load(
112112
"NVSHMEM_REDISTRIBUTIONS",
113113
)
114114
load(
115-
"//third_party/nvshmem/hermetic:nvshmem_redist_init_repository.bzl",
115+
"@rules_ml_toolchain//third_party/nvshmem/hermetic:nvshmem_redist_init_repository.bzl",
116116
"nvshmem_redist_init_repository",
117117
)
118118

@@ -121,8 +121,19 @@ nvshmem_redist_init_repository(
121121
)
122122

123123
load(
124-
"@xla//third_party/nvshmem/hermetic:nvshmem_configure.bzl",
124+
"@rules_ml_toolchain//third_party/nvshmem/hermetic:nvshmem_configure.bzl",
125125
"nvshmem_configure",
126126
)
127127

128128
nvshmem_configure(name = "local_config_nvshmem")
129+
130+
load(
131+
"@rules_ml_toolchain//cc_toolchain/deps:cc_toolchain_deps.bzl",
132+
"cc_toolchain_deps",
133+
)
134+
135+
cc_toolchain_deps()
136+
137+
register_toolchains("@rules_ml_toolchain//cc_toolchain:lx64_lx64")
138+
139+
register_toolchains("@rules_ml_toolchain//cc_toolchain:lx64_lx64_cuda")

tensorflow.bazelrc

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,6 @@ common --experimental_repo_remote_exec
434434

435435
# Make Bazel not try to probe the host system for a C++ toolchain.
436436
build:rbe_base --config=resultstore
437-
build:rbe_base --repo_env=BAZEL_DO_NOT_DETECT_CPP_TOOLCHAIN=1
438437
build:rbe_base --define=EXECUTOR=remote
439438
build:rbe_base --jobs=800
440439
build:rbe_base --remote_executor=grpcs://remotebuildexecution.googleapis.com
@@ -458,11 +457,6 @@ build:rbe_linux --host_linkopt=-lm
458457

459458
build:rbe_linux_cpu --config=rbe_linux
460459
# Linux cpu and cuda builds share the same toolchain now.
461-
build:rbe_linux_cpu --host_crosstool_top="@local_config_cuda//crosstool:toolchain"
462-
build:rbe_linux_cpu --crosstool_top="@local_config_cuda//crosstool:toolchain"
463-
build:rbe_linux_cpu --extra_toolchains="@local_config_cuda//crosstool:toolchain-linux-x86_64"
464-
build:rbe_linux_cpu --repo_env=CC="/usr/lib/llvm-18/bin/clang"
465-
build:rbe_linux_cpu --repo_env=TF_SYSROOT="/dt9"
466460
build:rbe_linux_cpu --extra_execution_platforms="@ml_build_config_platform//:platform"
467461
build:rbe_linux_cpu --host_platform="@ml_build_config_platform//:platform"
468462
build:rbe_linux_cpu --platforms="@ml_build_config_platform//:platform"
@@ -478,6 +472,16 @@ build:rbe_linux_cpu --python_path="/usr/bin/python3"
478472
# These you may need to change for your own GCP project.
479473
common:rbe_linux_cpu --remote_instance_name=projects/tensorflow-testing/instances/default_instance
480474

475+
# Deprecated RBE config with non-hermetic toolchains.
476+
build:rbe_linux_cpu_clang_local --config=clang_local
477+
build:rbe_linux_cpu_clang_local --config=rbe_linux_cpu
478+
build:rbe_linux_cpu_clang_local --repo_env=BAZEL_DO_NOT_DETECT_CPP_TOOLCHAIN=1
479+
build:rbe_linux_cpu_clang_local --host_crosstool_top="@local_config_cuda//crosstool:toolchain"
480+
build:rbe_linux_cpu_clang_local --crosstool_top="@local_config_cuda//crosstool:toolchain"
481+
build:rbe_linux_cpu_clang_local --extra_toolchains="@local_config_cuda//crosstool:toolchain-linux-x86_64"
482+
build:rbe_linux_cpu_clang_local --repo_env=CC="/usr/lib/llvm-18/bin/clang"
483+
build:rbe_linux_cpu_clang_local --repo_env=TF_SYSROOT="/dt9"
484+
481485
# Download CUDA/CUDNN redistributions to preserve the repositories cache between
482486
# CPU and GPU builds.
483487
# TODO(ybaturina): Uncomment when RBE is ready to support this.
@@ -498,7 +502,9 @@ build:rbe_linux_cuda_nvcc --config=rbe_linux_cuda
498502
build:rbe_linux_cuda_nvcc --config=cuda_nvcc
499503
build:rbe_linux_cuda_nvcc --repo_env TF_NCCL_USE_STUB=1
500504

505+
build:rbe_win_base --config=clang_local
501506
build:rbe_win_base --config=rbe_base
507+
build:rbe_win_base --repo_env=BAZEL_DO_NOT_DETECT_CPP_TOOLCHAIN=1
502508
build:rbe_win_base --shell_executable=C:\\tools\\msys64\\usr\\bin\\bash.exe
503509
build:rbe_win_base --remote_instance_name=projects/tensorflow-testing/instances/windows
504510
# Don't build the python zip archive in the RBE build.
@@ -514,6 +520,7 @@ build:rbe_windows_x86_cpu_2022 --config=rbe_win_base --config=windows_x86_cpu_20
514520
# END TF REMOTE BUILD EXECUTION OPTIONS
515521

516522
# TFLite build configs for generic embedded Linux
523+
build:elinux --config=clang_local
517524
build:elinux --crosstool_top=@local_config_embedded_arm//:toolchain
518525
build:elinux --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
519526
build:elinux_aarch64 --config=elinux
@@ -539,6 +546,7 @@ build:release_macos_base --define=no_nccl_support=true --output_filter=^$
539546

540547
# Ensure release_base is set on mac
541548
build:release_macos_base --config=cpu_cross
549+
build:release_macos_base --config=clang_local
542550

543551
# Build configs for macOS x86
544552
build:release_macos_x86 --config=release_macos_base
@@ -591,11 +599,13 @@ build:tf_public_macos_cache_push --config=tf_public_macos_cache --remote_upload_
591599
# flags seem to be actually used to specify the execution platform details. It
592600
# seems it is this way because these flags are old and predate the distinction
593601
# between host and execution platform.
602+
build:cross_compile_base --config=clang_local
594603
build:cross_compile_base --host_cpu=k8
595604
build:cross_compile_base --host_crosstool_top=//tools/toolchains/cross_compile/cc:cross_compile_toolchain_suite
596605
build:cross_compile_base --extra_execution_platforms=//tools/toolchains/cross_compile/config:linux_x86_64
597606

598607
build:rbe_cross_compile_base --config=rbe_base
608+
build:rbe_cross_compile_base --repo_env=BAZEL_DO_NOT_DETECT_CPP_TOOLCHAIN=1
599609
build:rbe_cross_compile_base --remote_instance_name=projects/tensorflow-testing/instances/default_instance
600610

601611
# Test-related settings below this point

workspace0.bzl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,17 @@ def workspace():
129129
# We only need `benchmark_deps` to be able to have bazel query to work and not complain about missing `@libpfm`.
130130
benchmark_deps()
131131

132+
# Toolchains for ML projects hermetic builds.
133+
# Details: https://github.com/google-ml-infra/rules_ml_toolchain
134+
http_archive(
135+
name = "rules_ml_toolchain",
136+
sha256 = "368dbe2aecf6872c9e05bbee0e47b56f5b0d65827b76ed2219dd2bac2f170f93",
137+
strip_prefix = "rules_ml_toolchain-25a2bd8b442e82543f223d507d3391d46ee99284",
138+
urls = [
139+
"https://github.com/google-ml-infra/rules_ml_toolchain/archive/25a2bd8b442e82543f223d507d3391d46ee99284.tar.gz",
140+
],
141+
)
142+
132143
# If a target is bound twice, the later one wins, so we have to do tf bindings
133144
# at the end of the WORKSPACE file.
134145
_tf_bind()

0 commit comments

Comments
 (0)