Skip to content

Commit f2f8e1f

Browse files
author
Yifu Wang
committed
Update
[ghstack-poisoned]
2 parents 998c524 + 991aa6e commit f2f8e1f

891 files changed

Lines changed: 15631 additions & 8132 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.ci/docker/build.sh

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -414,9 +414,6 @@ case "$image" in
414414
DB=yes
415415
VISION=yes
416416
CONDA_CMAKE=yes
417-
# snadampal: skipping sccache due to the following issue
418-
# https://github.com/pytorch/pytorch/issues/121559
419-
SKIP_SCCACHE_INSTALL=yes
420417
# snadampal: skipping llvm src build install because the current version
421418
# from pytorch/llvm:9.0.1 is x86 specific
422419
SKIP_LLVM_SRC_BUILD_INSTALL=yes
@@ -429,9 +426,6 @@ case "$image" in
429426
DB=yes
430427
VISION=yes
431428
CONDA_CMAKE=yes
432-
# snadampal: skipping sccache due to the following issue
433-
# https://github.com/pytorch/pytorch/issues/121559
434-
SKIP_SCCACHE_INSTALL=yes
435429
# snadampal: skipping llvm src build install because the current version
436430
# from pytorch/llvm:9.0.1 is x86 specific
437431
SKIP_LLVM_SRC_BUILD_INSTALL=yes
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
ca4783992ed7602a39528ba304d61f00396b2a5a
1+
16b633b4daa7f3d3442be62a3589bd60b2f7fdc7

.ci/docker/common/install_cache.sh

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,12 @@ install_ubuntu() {
99
# Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``
1010
apt-get install -y cargo
1111
echo "Checking out sccache repo"
12-
git clone https://github.com/pytorch/sccache
12+
if [ -n "$CUDA_VERSION" ]; then
13+
# TODO: Remove this
14+
git clone https://github.com/pytorch/sccache
15+
else
16+
git clone https://github.com/mozilla/sccache -b v0.8.2
17+
fi
1318
cd sccache
1419
echo "Building sccache"
1520
cargo build --release
@@ -19,6 +24,10 @@ install_ubuntu() {
1924
rm -rf sccache
2025
apt-get remove -y cargo rustc
2126
apt-get autoclean && apt-get clean
27+
28+
echo "Downloading old sccache binary from S3 repo for PCH builds"
29+
curl --retry 3 https://s3.amazonaws.com/ossci-linux/sccache -o /opt/cache/bin/sccache-0.2.14a
30+
chmod 755 /opt/cache/bin/sccache-0.2.14a
2231
}
2332

2433
install_binary() {
@@ -36,18 +45,46 @@ if [ -n "$ROCM_VERSION" ]; then
3645
curl --retry 3 http://repo.radeon.com/misc/.sccache_amd/sccache -o /opt/cache/bin/sccache
3746
else
3847
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
39-
# TODO: Install the pre-built binary from S3 as building from source
40-
# https://github.com/pytorch/sccache has started failing mysteriously
41-
# in which sccache server couldn't start with the following error:
42-
# sccache: error: Invalid argument (os error 22)
43-
install_binary
48+
if [ -n "$CUDA_VERSION" ]; then
49+
# TODO: Install the pre-built binary from S3 as building from source
50+
# https://github.com/pytorch/sccache has started failing mysteriously
51+
# in which sccache server couldn't start with the following error:
52+
# sccache: error: Invalid argument (os error 22)
53+
install_binary
54+
else
55+
install_ubuntu
56+
fi
4457
fi
4558
chmod a+x /opt/cache/bin/sccache
4659

4760
function write_sccache_stub() {
4861
# Unset LD_PRELOAD for ps because of asan + ps issues
4962
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90589
50-
printf "#!/bin/sh\nif [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then\n exec sccache $(which $1) \"\$@\"\nelse\n exec $(which $1) \"\$@\"\nfi" > "/opt/cache/bin/$1"
63+
if [ $1 == "gcc" ]; then
64+
# Do not call sccache recursively when dumping preprocessor argument
65+
# For some reason it's very important for the first cached nvcc invocation
66+
cat > "/opt/cache/bin/$1" <<EOF
67+
#!/bin/sh
68+
69+
if [ "\$1" = "-E" ] || [ "\$2" = "-E" ]; then
70+
exec $(which $1) "\$@"
71+
elif [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
72+
exec sccache $(which $1) "\$@"
73+
else
74+
exec $(which $1) "\$@"
75+
fi
76+
EOF
77+
else
78+
cat > "/opt/cache/bin/$1" <<EOF
79+
#!/bin/sh
80+
81+
if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
82+
exec sccache $(which $1) "\$@"
83+
else
84+
exec $(which $1) "\$@"
85+
fi
86+
EOF
87+
fi
5188
chmod a+x "/opt/cache/bin/$1"
5289
}
5390

.ci/docker/libtorch/Dockerfile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,11 @@ RUN bash ./install_cuda.sh 12.4
6666
RUN bash ./install_magma.sh 12.4
6767
RUN ln -sf /usr/local/cuda-12.4 /usr/local/cuda
6868

69+
FROM cuda as cuda12.6
70+
RUN bash ./install_cuda.sh 12.6
71+
RUN bash ./install_magma.sh 12.6
72+
RUN ln -sf /usr/local/cuda-12.6 /usr/local/cuda
73+
6974
FROM cpu as rocm
7075
ARG PYTORCH_ROCM_ARCH
7176
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}

.ci/docker/requirements-ci.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ tb-nightly==2.13.0a20230426
257257
#test that import:
258258

259259
# needed by torchgen utils
260-
typing-extensions
260+
typing-extensions>=4.10.0
261261
#Description: type hints for python
262262
#Pinned versions:
263263
#test that import:
@@ -331,7 +331,7 @@ sympy==1.13.1 ; python_version >= "3.9"
331331
#Pinned versions:
332332
#test that import:
333333

334-
onnx==1.16.1
334+
onnx==1.17.0
335335
#Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal
336336
#Pinned versions:
337337
#test that import:

.ci/pytorch/build.sh

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -398,8 +398,6 @@ if [[ "$BUILD_ENVIRONMENT" != *libtorch* && "$BUILD_ENVIRONMENT" != *bazel* ]];
398398
python tools/stats/export_test_times.py
399399
fi
400400

401-
# snadampal: skipping it till sccache support added for aarch64
402-
# https://github.com/pytorch/pytorch/issues/121559
403-
if [[ "$BUILD_ENVIRONMENT" != *aarch64* && "$BUILD_ENVIRONMENT" != *s390x* ]]; then
401+
if [[ "$BUILD_ENVIRONMENT" != *s390x* ]]; then
404402
print_sccache_stats
405403
fi

.ci/pytorch/common-build.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@ if [[ "$BUILD_ENVIRONMENT" != *win-* ]]; then
66
# Save the absolute path in case later we chdir (as occurs in the gpu perf test)
77
script_dir="$( cd "$(dirname "${BASH_SOURCE[0]}")" || exit ; pwd -P )"
88

9+
if [[ "${BUILD_ENVIRONMENT}" == *-pch* ]]; then
10+
# This is really weird, but newer sccache somehow produces broken binary
11+
# see https://github.com/pytorch/pytorch/issues/139188
12+
sudo mv /opt/cache/bin/sccache-0.2.14a /opt/cache/bin/sccache
13+
fi
14+
915
if which sccache > /dev/null; then
1016
# Save sccache logs to file
1117
sccache --stop-server > /dev/null 2>&1 || true

.ci/pytorch/test.sh

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ test_python() {
296296
}
297297

298298

299-
test_dynamo_shard() {
299+
test_dynamo_wrapped_shard() {
300300
if [[ -z "$NUM_TEST_SHARDS" ]]; then
301301
echo "NUM_TEST_SHARDS must be defined to run a Python test shard"
302302
exit 1
@@ -380,6 +380,10 @@ test_inductor_cpp_wrapper() {
380380
TEST_REPORTS_DIR=$(pwd)/test/test-reports
381381
mkdir -p "$TEST_REPORTS_DIR"
382382

383+
# Run certain inductor unit tests with cpp wrapper. In the end state, we should be able to run all the inductor
384+
# unit tests with cpp wrapper.
385+
python test/run_test.py --include inductor/test_torchinductor.py --verbose
386+
383387
python benchmarks/dynamo/timm_models.py --device cuda --accuracy --amp \
384388
--training --inductor --disable-cudagraphs --only vit_base_patch16_224 \
385389
--output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv"
@@ -802,7 +806,7 @@ test_without_numpy() {
802806
# Regression test for https://github.com/pytorch/pytorch/issues/66353
803807
python -c "import sys;sys.path.insert(0, 'fake_numpy');import torch;print(torch.tensor([torch.tensor(0.), torch.tensor(1.)]))"
804808
# Regression test for https://github.com/pytorch/pytorch/issues/109387
805-
if [[ "${TEST_CONFIG}" == *dynamo* ]]; then
809+
if [[ "${TEST_CONFIG}" == *dynamo_wrapped* ]]; then
806810
python -c "import sys;sys.path.insert(0, 'fake_numpy');import torch;torch.compile(lambda x:print(x))('Hello World')"
807811
fi
808812
popd
@@ -1475,9 +1479,9 @@ elif [[ "${TEST_CONFIG}" == *inductor* ]]; then
14751479
test_inductor_distributed
14761480
fi
14771481
fi
1478-
elif [[ "${TEST_CONFIG}" == *dynamo* ]]; then
1482+
elif [[ "${TEST_CONFIG}" == *dynamo_wrapped* ]]; then
14791483
install_torchvision
1480-
test_dynamo_shard "${SHARD_NUMBER}"
1484+
test_dynamo_wrapped_shard "${SHARD_NUMBER}"
14811485
if [[ "${SHARD_NUMBER}" == 1 ]]; then
14821486
test_aten
14831487
fi

.ci/pytorch/win-test-helpers/build_pytorch.bat

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ if not errorlevel 0 goto fail
5252

5353
if "%USE_XPU%"=="1" (
5454
:: Activate xpu environment - VS env is required for xpu
55-
call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
55+
call "C:\Program Files (x86)\Intel\oneAPI\compiler\latest\env\vars.bat"
56+
call "C:\Program Files (x86)\Intel\oneAPI\ocloc\latest\env\vars.bat"
5657
if errorlevel 1 exit /b 1
5758
:: Reduce build time. Only have MTL self-hosted runner now
5859
SET TORCH_XPU_ARCH_LIST=xe-lpg

.github/actions/build-android/action.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,14 @@ runs:
4242
PR_NUMBER: ${{ github.event.pull_request.number }}
4343
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
4444
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
45+
SCCACHE_REGION: us-east-1
4546
DOCKER_IMAGE: ${{ inputs.docker-image }}
4647
MATRIX_ARCH: ${{ inputs.arch }}
4748
run: |
4849
# detached container should get cleaned up by teardown_ec2_linux
4950
set -exo pipefail
51+
# Fetch aws credential from IMDs
52+
eval "$(python3 .github/scripts/get_aws_session_tokens.py)"
5053
export container_name
5154
container_name=$(docker run \
5255
-e BUILD_ENVIRONMENT \
@@ -56,6 +59,7 @@ runs:
5659
-e SHA1 \
5760
-e BRANCH \
5861
-e SCCACHE_BUCKET \
62+
-e SCCACHE_REGION \
5963
-e SKIP_SCCACHE_INITIALIZATION=1 \
6064
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
6165
--security-opt seccomp=unconfined \

0 commit comments

Comments
 (0)