pytorch
diff --git a/‎.ci/docker/build_docker.sh‎
Lines changed: 8 additions & 13 deletions b/‎.ci/docker/build_docker.sh‎
Lines changed: 8 additions & 13 deletions
diff --git a/‎.ci/docker/ci_commit_pins/triton-rocm.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/triton-rocm.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/common/install_onnx.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/common/install_onnx.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/pytorch/build-asan.sh‎
Lines changed: 0 additions & 39 deletions b/‎.ci/pytorch/build-asan.sh‎
Lines changed: 0 additions & 39 deletions
diff --git a/‎.ci/pytorch/build.sh‎
Lines changed: 9 additions & 4 deletions b/‎.ci/pytorch/build.sh‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎.ci/pytorch/common_utils.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/pytorch/common_utils.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/pytorch/multigpu-test.sh‎
Lines changed: 1 addition & 0 deletions b/‎.ci/pytorch/multigpu-test.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/pytorch/test.sh‎
Lines changed: 44 additions & 44 deletions b/‎.ci/pytorch/test.sh‎
Lines changed: 44 additions & 44 deletions
diff --git a/‎.clang-tidy‎
Lines changed: 0 additions & 1 deletion b/‎.clang-tidy‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/actions/calculate-docker-image/action.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/actions/calculate-docker-image/action.yml‎
Lines changed: 1 addition & 1 deletion
@@ -6,17 +6,17 @@ retry () {
     $*  || (sleep 1 && $*) || (sleep 2 && $*)
 }
 
-# If UPSTREAM_BUILD_ID is set (see trigger job), then we can
-# use it to tag this build with the same ID used to tag all other
-# base image builds. Also, we can try and pull the previous
-# image first, to avoid rebuilding layers that haven't changed.
-
-#until we find a way to reliably reuse previous build, this last_tag is not in use
-# last_tag="$(( CIRCLE_BUILD_NUM - 1 ))"
 tag="${DOCKER_TAG}"
+registry="308535385114.dkr.ecr.us-east-1.amazonaws.com"
 
+# NB: The image name could now be both the short form, like pytorch-linux-bionic-py3.11-clang9, or the
+# full name, like 308535385114.dkr.ecr.us-east-1.amazonaws.com/pytorch/pytorch-linux-bionic-py3.11-clang9
+if [[ "${IMAGE_NAME}" == *"${registry}/pytorch/"* ]]; then
+  # Extract the image name from the long name
+  EXTRACTED_IMAGE_NAME=$(echo ${IMAGE_NAME#"${registry}/pytorch/"} | awk -F '[:,]' '{print $1}')
+  IMAGE_NAME="${EXTRACTED_IMAGE_NAME}"
+fi
 
-registry="308535385114.dkr.ecr.us-east-1.amazonaws.com"
 image="${registry}/pytorch/${IMAGE_NAME}"
 
 login() {
@@ -35,11 +35,6 @@ if [[ -z "${GITHUB_ACTIONS}" ]]; then
   trap "docker logout ${registry}" EXIT
 fi
 
-# Try to pull the previous image (perhaps we can reuse some layers)
-# if [ -n "${last_tag}" ]; then
-#   docker pull "${image}:${last_tag}" || true
-# fi
-
 # Build new image
 ./build.sh ${IMAGE_NAME} -t "${image}:${tag}"
 
 
@@ -1 +1 @@
-cd9e4c5db76b2bb0b47d0680d3d4c24523047e7c
+9dc100afb538d39da17621e0f8ad233f2078e6ff
@@ -24,7 +24,7 @@ pip_install \
   transformers==4.25.1
 
 # TODO: change this when onnx-script is on testPypi
-pip_install "onnxscript@git+https://github.com/microsoft/onnxscript@a3caa39b14e8ee187573f6cb607e4fe4b9fe1f2f"
+pip_install "onnxscript@git+https://github.com/microsoft/onnxscript@7e131c578f290ffad1f26bacda11a83daf5476ba"
 
 # Cache the transformers model to be used later by ONNX tests. We need to run the transformers
 # package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/
 
@@ -11,10 +11,6 @@ source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
 # shellcheck source=./common-build.sh
 source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
 
-if [[ "$BUILD_ENVIRONMENT" == *-clang7-asan* ]]; then
-  exec "$(dirname "${BASH_SOURCE[0]}")/build-asan.sh" "$@"
-fi
-
 if [[ "$BUILD_ENVIRONMENT" == *-mobile-*build* ]]; then
   exec "$(dirname "${BASH_SOURCE[0]}")/build-mobile.sh" "$@"
 fi
@@ -168,6 +164,15 @@ if [[ "${BUILD_ENVIRONMENT}" == *clang* ]]; then
   export CXX=clang++
 fi
 
+if [[ "$BUILD_ENVIRONMENT" == *-clang*-asan* ]]; then
+  export LDSHARED="clang --shared"
+  export USE_CUDA=0
+  export USE_ASAN=1
+  export USE_MKLDNN=0
+  export UBSAN_FLAGS="-fno-sanitize-recover=all"
+  unset USE_LLVM
+fi
+
 if [[ "${BUILD_ENVIRONMENT}" == *no-ops* ]]; then
   export USE_PER_OPERATOR_HEADERS=0
 fi
 
@@ -175,7 +175,7 @@ function checkout_install_torchdeploy() {
   pushd multipy
   git checkout "${commit}"
   python multipy/runtime/example/generate_examples.py
-  pip install -e . --install-option="--cudatests"
+  BUILD_CUDA_TESTS=1 pip install -e .
   popd
   popd
 }
 
@@ -46,4 +46,5 @@ time python test/run_test.py --verbose -i distributed/tensor/parallel/test_tp_ex
 # Other tests
 time python test/run_test.py --verbose -i test_cuda_primary_ctx
 time python test/run_test.py --verbose -i test_optim -- -k optimizers_with_varying_tensors
+time python test/run_test.py --verbose -i test_foreach -- -k test_tensors_grouping
 assert_git_not_dirty
@@ -126,7 +126,7 @@ fi
 # if you're not careful.  Check this if you made some changes and the
 # ASAN test is not working
 if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then
-    export ASAN_OPTIONS=detect_leaks=0:symbolize=1:detect_stack_use_after_return=1:strict_init_order=true:detect_odr_violation=1:detect_container_overflow=0
+    export ASAN_OPTIONS=detect_leaks=0:symbolize=1:detect_stack_use_after_return=true:strict_init_order=true:detect_odr_violation=1:detect_container_overflow=0:check_initialization_order=true:debug=true
     export UBSAN_OPTIONS=print_stacktrace=1
     export PYTORCH_TEST_WITH_ASAN=1
     export PYTORCH_TEST_WITH_UBSAN=1
@@ -166,6 +166,8 @@ if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then
 
     # TODO: get rid of the hardcoded path
     export LD_PRELOAD=/usr/lib/llvm-7/lib/clang/7.0.1/lib/linux/libclang_rt.asan-x86_64.so
+    # Disable valgrind for asan
+    export VALGRIND=OFF
     # Increase stack size, because ASAN red zones use more stack
     ulimit -s 81920
 
@@ -312,7 +314,6 @@ test_perf_for_dashboard() {
   local suite="$1"
   shift
 
-  local dtype=amp
   local backend=inductor
   local modes=()
   if [[ "$DASHBOARD_TAG" == *training-true* ]]; then
@@ -325,6 +326,11 @@ test_perf_for_dashboard() {
   local targets=(accuracy performance)
 
   for mode in "${modes[@]}"; do
+    if [[ "$mode" == "inference" ]]; then
+      dtype=bfloat16
+    elif [[ "$mode" == "training" ]]; then
+      dtype=amp
+    fi
     for target in "${targets[@]}"; do
       local target_flag=("--${target}")
       if [[ "$target" == "performance" ]]; then
@@ -430,7 +436,7 @@ test_dynamo_benchmark() {
     if [[ "${TEST_CONFIG}" == *cpu_accuracy* ]]; then
       test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --float32 "$@"
     else
-      test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --amp "$@"
+      test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --bfloat16 "$@"
       test_single_dynamo_benchmark "training" "$suite" "$shard_id" --training --amp "$@"
     fi
   fi
@@ -469,7 +475,7 @@ test_aten() {
   # Test ATen
   # The following test(s) of ATen have already been skipped by caffe2 in rocm environment:
   # scalar_tensor_test, basic, native_test
-  if [[ "$BUILD_ENVIRONMENT" != *asan* ]] && [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
+  if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
     echo "Running ATen tests with pytorch lib"
 
     if [[ -n "$IN_WHEEL_TEST" ]]; then
@@ -669,51 +675,45 @@ test_rpc() {
 }
 
 test_custom_backend() {
-  if [[ "$BUILD_ENVIRONMENT" != *asan* ]] ; then
-    echo "Testing custom backends"
-    CUSTOM_BACKEND_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/custom-backend-build"
-    pushd test/custom_backend
-    cp -a "$CUSTOM_BACKEND_BUILD" build
-    # Run tests Python-side and export a lowered module.
-    python test_custom_backend.py -v
-    python backend.py --export-module-to=model.pt
-    # Run tests C++-side and load the exported lowered module.
-    build/test_custom_backend ./model.pt
-    rm -f ./model.pt
-    popd
-    assert_git_not_dirty
-  fi
+  echo "Testing custom backends"
+  CUSTOM_BACKEND_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/custom-backend-build"
+  pushd test/custom_backend
+  cp -a "$CUSTOM_BACKEND_BUILD" build
+  # Run tests Python-side and export a lowered module.
+  python test_custom_backend.py -v
+  python backend.py --export-module-to=model.pt
+  # Run tests C++-side and load the exported lowered module.
+  build/test_custom_backend ./model.pt
+  rm -f ./model.pt
+  popd
+  assert_git_not_dirty
 }
 
 test_custom_script_ops() {
-  if [[ "$BUILD_ENVIRONMENT" != *asan* ]] ; then
-    echo "Testing custom script operators"
-    CUSTOM_OP_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/custom-op-build"
-    pushd test/custom_operator
-    cp -a "$CUSTOM_OP_BUILD" build
-    # Run tests Python-side and export a script module.
-    python test_custom_ops.py -v
-    python model.py --export-script-module=model.pt
-    # Run tests C++-side and load the exported script module.
-    build/test_custom_ops ./model.pt
-    popd
-    assert_git_not_dirty
-  fi
+  echo "Testing custom script operators"
+  CUSTOM_OP_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/custom-op-build"
+  pushd test/custom_operator
+  cp -a "$CUSTOM_OP_BUILD" build
+  # Run tests Python-side and export a script module.
+  python test_custom_ops.py -v
+  python model.py --export-script-module=model.pt
+  # Run tests C++-side and load the exported script module.
+  build/test_custom_ops ./model.pt
+  popd
+  assert_git_not_dirty
 }
 
 test_jit_hooks() {
-  if [[ "$BUILD_ENVIRONMENT" != *asan* ]] ; then
-    echo "Testing jit hooks in cpp"
-    HOOK_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/jit-hook-build"
-    pushd test/jit_hooks
-    cp -a "$HOOK_BUILD" build
-    # Run tests Python-side and export the script modules with hooks
-    python model.py --export-script-module=model
-    # Run tests C++-side and load the exported script modules
-    build/test_jit_hooks ./model
-    popd
-    assert_git_not_dirty
-  fi
+  echo "Testing jit hooks in cpp"
+  HOOK_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/jit-hook-build"
+  pushd test/jit_hooks
+  cp -a "$HOOK_BUILD" build
+  # Run tests Python-side and export the script modules with hooks
+  python model.py --export-script-module=model
+  # Run tests C++-side and load the exported script modules
+  build/test_jit_hooks ./model
+  popd
+  assert_git_not_dirty
 }
 
 test_torch_function_benchmark() {
@@ -923,7 +923,7 @@ test_cpp_extensions() {
 
 test_vec256() {
   # This is to test vec256 instructions DEFAULT/AVX/AVX2 (platform dependent, some platforms might not support AVX/AVX2)
-  if [[ "$BUILD_ENVIRONMENT" != *asan* ]] && [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
+  if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
     echo "Testing vec256 instructions"
     mkdir -p test/test-reports/vec256
     pushd build/bin
 
@@ -42,7 +42,6 @@ modernize-*,
 -modernize-use-trailing-return-type,
 -modernize-use-nodiscard,
 performance-*,
--performance-noexcept-move-constructor,
 -performance-unnecessary-value-param,
 readability-container-size-empty,
 '
 
@@ -111,7 +111,7 @@ runs:
     - name: Build and push docker image
       if: inputs.always-rebuild || steps.check.outputs.rebuild
       env:
-        IMAGE_NAME: ${{inputs.docker-image-name}}
+        IMAGE_NAME: ${{ inputs.docker-image-name }}
         DOCKER_SKIP_S3_UPLOAD: "1"
         # Skip push if we don't need it, or if specified in the inputs
         DOCKER_SKIP_PUSH: ${{ steps.check.outputs.skip_push || inputs.skip_push }}
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-cd9e4c5db76b2bb0b47d0680d3d4c24523047e7c`
	`1`	`+9dc100afb538d39da17621e0f8ad233f2078e6ff`
Original file line number	Diff line number	Diff line change
`@@ -175,7 +175,7 @@ function checkout_install_torchdeploy() {`
`175`	`175`	`pushd multipy`
`176`	`176`	`git checkout "${commit}"`
`177`	`177`	`python multipy/runtime/example/generate_examples.py`
`178`		`- pip install -e . --install-option="--cudatests"`
	`178`	`+ BUILD_CUDA_TESTS=1 pip install -e .`
`179`	`179`	`popd`
`180`	`180`	`popd`
`181`	`181`	`}`
Original file line number	Diff line number	Diff line change
`@@ -42,7 +42,6 @@ modernize-*,`
`42`	`42`	`-modernize-use-trailing-return-type,`
`43`	`43`	`-modernize-use-nodiscard,`
`44`	`44`	`performance-*,`
`45`		`--performance-noexcept-move-constructor,`
`46`	`45`	`-performance-unnecessary-value-param,`
`47`	`46`	`readability-container-size-empty,`
`48`	`47`	`'`