pytorch
diff --git a/‎aten/src/ATen/native/ForeachOpsKernels.cpp‎
Lines changed: 4 additions & 4 deletions b/‎aten/src/ATen/native/ForeachOpsKernels.cpp‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎aten/src/ATen/native/ForeachUtils.h‎
Lines changed: 25 additions & 24 deletions b/‎aten/src/ATen/native/ForeachUtils.h‎
Lines changed: 25 additions & 24 deletions
diff --git a/‎aten/src/ATen/native/cuda/ForeachBinaryOpScalarList.cu‎
Lines changed: 33 additions & 34 deletions b/‎aten/src/ATen/native/cuda/ForeachBinaryOpScalarList.cu‎
Lines changed: 33 additions & 34 deletions
@@ -25,15 +25,15 @@ std::vector<Tensor> foreach_tensor_##OP##_scalar_kernel_slow(TensorList tensors,
 }
 
 #define FOREACH_BINARY_OP_SCALARLIST(OP)                                                                                \
-void foreach_tensor_##OP##_scalarlist_kernel_slow_(TensorList tensors, at::ArrayRef<double> scalars) {                  \
+void foreach_tensor_##OP##_scalarlist_kernel_slow_(TensorList tensors, at::ArrayRef<Scalar> scalars) {                  \
   check_foreach_api_restrictions(tensors, scalars);                                                                     \
                                                                                                                         \
   for (size_t i = 0; i < tensors.size(); i++) {                                                                            \
       tensors[i].OP##_(scalars[i]);                                                                                     \
     }                                                                                                                   \
 }                                                                                                                       \
                                                                                                                         \
-std::vector<Tensor> foreach_tensor_##OP##_scalarlist_kernel_slow(TensorList tensors, at::ArrayRef<double> scalars) {    \
+std::vector<Tensor> foreach_tensor_##OP##_scalarlist_kernel_slow(TensorList tensors, at::ArrayRef<Scalar> scalars) {    \
   check_foreach_api_restrictions(tensors, scalars);                                                                     \
   std::vector<Tensor> result;                                                                                           \
   result.reserve(tensors.size());                                                                                       \
@@ -128,7 +128,7 @@ void foreach_tensor_##OP##_scalar_slow_(TensorList input, TensorList tensors1, T
 }                                                                                                                                    \
 
 #define FOREACH_POINTWISE_OP_SCALARLIST(OP)                                                                                                             \
-std::vector<Tensor> foreach_tensor_##OP##_scalarlist_slow(TensorList input, TensorList tensors1, TensorList tensors2, at::ArrayRef<double> scalars) {   \
+std::vector<Tensor> foreach_tensor_##OP##_scalarlist_slow(TensorList input, TensorList tensors1, TensorList tensors2, at::ArrayRef<Scalar> scalars) {   \
   check_foreach_api_restrictions(input, tensors1, tensors2, scalars);                                                                                   \
                                                                                                                                                         \
   std::vector<Tensor> result;                                                                                                                           \
@@ -139,7 +139,7 @@ std::vector<Tensor> foreach_tensor_##OP##_scalarlist_slow(TensorList input, Tens
   return result;                                                                                                                                        \
 }                                                                                                                                                       \
                                                                                                                                                         \
-void foreach_tensor_##OP##_scalarlist_slow_(TensorList input, TensorList tensors1, TensorList tensors2, at::ArrayRef<double> scalars) {                 \
+void foreach_tensor_##OP##_scalarlist_slow_(TensorList input, TensorList tensors1, TensorList tensors2, at::ArrayRef<Scalar> scalars) {                 \
   check_foreach_api_restrictions(input, tensors1, tensors2, scalars);                                                                                   \
                                                                                                                                                         \
   for (size_t i = 0; i < input.size(); i++) {                                                                                                              \
 
@@ -4,7 +4,11 @@
 namespace at {
 namespace native {
 namespace {
-
+// Check foreach API restrictions 
+// - Tensor lists must be non-empty.
+// - All tensors in all lists must have the same dtype.
+// - All TensorLists and ScalarLists must have the same number of elements.
+// - Corresponding tensors must have the same size.
 void check_foreach_api_restrictions(TensorList tensors) {
   TORCH_CHECK(tensors.size() > 0, "Tensor list must have at least one tensor.");
   auto expected_dtype = tensors[0].dtype();
@@ -13,7 +17,7 @@ void check_foreach_api_restrictions(TensorList tensors) {
   }
 }
 
-void check_foreach_api_restrictions(TensorList tensors, ArrayRef<double> scalars) {
+void check_foreach_api_restrictions(TensorList tensors, ArrayRef<Scalar> scalars) {
   check_foreach_api_restrictions(tensors);
   TORCH_CHECK(tensors.size() == scalars.size(), "Tensor list must have same number of elements as scalar list.");
 }
@@ -49,7 +53,7 @@ void check_foreach_api_restrictions(TensorList tensors1, TensorList tensors2, Te
   }
 }
 
-void check_foreach_api_restrictions(TensorList tensors1, TensorList tensors2, TensorList tensors3, ArrayRef<double> scalars) {
+void check_foreach_api_restrictions(TensorList tensors1, TensorList tensors2, TensorList tensors3, ArrayRef<Scalar> scalars) {
   check_foreach_api_restrictions(tensors1, tensors2, tensors3);
   TORCH_CHECK(tensors1.size() == scalars.size(), "Tensor list must have same number of elements as scalar list, got ", tensors1.size(), " and ", scalars.size());
 }
@@ -85,21 +89,8 @@ bool has_same_attributes(Device expected_device, TensorList tensors) {
 }
 
 bool will_promote_tensor(const Tensor& tensor, Scalar scalar) {
-  // complex scalar + integral or boolean tensor will result in complex tensor
-  if (scalar.isComplex() && at::isIntegralType(tensor.scalar_type(), /*includeBool*/ true)) {
-    return false;
-  }
-
-  // float scalar + integral or boolean tensor will result in float tensor
-  if (scalar.isFloatingPoint() && at::isIntegralType(tensor.scalar_type(), /*includeBool*/ true)) {
-    return false;
-  }
-
-  // integral scalar + boolean tensor will result in integral tensor
-  if (scalar.isIntegral(/*includeBool*/ false) && tensor.dtype() == at::kBool) {
-    return false;
-  }
-  return true;
+  auto result_dtype = at::result_type(tensor, scalar);
+  return result_dtype != tensor.scalar_type();
 }
 
 bool can_use_fast_route(TensorList tensors) {
@@ -128,7 +119,7 @@ bool can_use_fast_route(TensorList tensors, Scalar scalar) {
       return false;
     }
 
-    if (!will_promote_tensor(t, scalar)) {
+    if (will_promote_tensor(t, scalar)) {
       return false;
     }
   }
@@ -137,8 +128,18 @@ bool can_use_fast_route(TensorList tensors, Scalar scalar) {
 #endif
 }
 
-bool can_use_fast_route(TensorList tensors, ArrayRef<double> scalars) {
-  return can_use_fast_route(tensors);
+bool can_use_fast_route(TensorList tensors, ArrayRef<Scalar> scalars) {
+#ifdef __HIP_PLATFORM_HCC__
+  return false;
+#else
+  for (int i = 0; i < tensors.size(); i++) {
+    if (will_promote_tensor(tensors[i], scalars[i])) {
+      return false;
+    }
+  }
+
+  return true;
+#endif
 }
 
 bool can_use_fast_route(TensorList tensors1, TensorList tensors2) {
@@ -166,7 +167,7 @@ bool can_use_fast_route(TensorList tensors1, TensorList tensors2, Scalar scalar)
       return false;
     }
 
-    if (!will_promote_tensor(tensors1[i], scalar)) {
+    if (will_promote_tensor(tensors1[i], scalar)) {
       return false;
     }
   }
@@ -200,7 +201,7 @@ bool can_use_fast_route(TensorList tensors1, TensorList tensors2, TensorList ten
       return false;
     }
 
-    if (!will_promote_tensor(tensors1[i], scalar)) {
+    if (will_promote_tensor(tensors1[i], scalar)) {
       return false;
     }
   }
@@ -209,7 +210,7 @@ bool can_use_fast_route(TensorList tensors1, TensorList tensors2, TensorList ten
 #endif
 }
 
-bool can_use_fast_route(TensorList tensors1, TensorList tensors2, TensorList tensors3, ArrayRef<double> scalars) {
+bool can_use_fast_route(TensorList tensors1, TensorList tensors2, TensorList tensors3, ArrayRef<Scalar> scalars) {
   return can_use_fast_route(tensors1, tensors2, tensors3);
 }
 
 
@@ -5,7 +5,7 @@
 namespace at { namespace native {
 
 template<template<class> class Op>
-std::vector<Tensor> foreach_binary_op(TensorList tensors, at::ArrayRef<double> scalars) {
+std::vector<Tensor> foreach_binary_op(TensorList tensors, at::ArrayRef<Scalar> scalars) {
     std::vector<std::vector<at::Tensor>> tensor_lists;
     std::vector<at::Tensor> vec_res;
     vec_res.reserve(tensors.size());
@@ -18,52 +18,51 @@ std::vector<Tensor> foreach_binary_op(TensorList tensors, at::ArrayRef<double> s
 
     AT_DISPATCH_ALL_TYPES_AND2(kBFloat16, kHalf, tensors[0].scalar_type(), "foreach_binary_op_scalarlist_cuda", [&]() {
         using opmath_t = get_opmath_t<scalar_t>::opmath_t;
-        multi_tensor_apply<2>(tensor_lists,
-                              scalars,
-                              BinaryOpScalarListFunctor<scalar_t, 
-                                                        /* depth */ 2,
-                                                        /* r_args_depth */ 1, 
-                                                        /* res_arg_index */ 1>(),
-                                                       
-                              Op<opmath_t>());
+        multi_tensor_apply<2, opmath_t>(tensor_lists,
+                                        scalars,
+                                        BinaryOpScalarListFunctor<scalar_t,
+                                                                  /* depth */ 2,
+                                                                  /* r_args_depth */ 1,
+                                                                  /* res_arg_index */ 1>(),
+                                        Op<opmath_t>());
     });
     return tensor_lists[1];
 }
 
 template<template<class> class Op>
-void foreach_binary_op_(TensorList tensors, at::ArrayRef<double> scalars) {
+void foreach_binary_op_(TensorList tensors, at::ArrayRef<Scalar> scalars) {
     std::vector<std::vector<at::Tensor>> tensor_lists;
     tensor_lists.emplace_back(tensors.vec());
 
     AT_DISPATCH_ALL_TYPES_AND2(kBFloat16, kHalf, tensors[0].scalar_type(), "foreach_binary_op_scalarlist_cuda_", [&]() {
         using opmath_t = get_opmath_t<scalar_t>::opmath_t;
-        multi_tensor_apply<1>(tensor_lists,
-                              scalars,
-                              BinaryOpScalarListFunctor<scalar_t, 
-                                                        /* depth */ 1,
-                                                        /* r_args_depth */ 1, 
-                                                        /* res_arg_index */ 0>(),
-                              Op<opmath_t>());
+        multi_tensor_apply<1, opmath_t>(tensor_lists,
+                                        scalars,
+                                        BinaryOpScalarListFunctor<scalar_t,
+                                                                    /* depth */ 1,
+                                                                    /* r_args_depth */ 1,
+                                                                    /* res_arg_index */ 0>(),
+                                        Op<opmath_t>());
     });
 }
 
-#define FOREACH_BINARY_OP_SCALARLIST(NAME, OP)                                                                           \
-void foreach_tensor_##NAME##_scalarlist_kernel_cuda_(TensorList tensors, at::ArrayRef<double> scalars) {                 \
-    check_foreach_api_restrictions(tensors, scalars);                                                                    \
-    if (!can_use_fast_route(tensors, scalars)) {                                                                         \
-        return at::native::foreach_tensor_##NAME##_scalarlist_kernel_slow_(tensors, scalars);                            \
-    }                                                                                                                    \
-                                                                                                                         \
-    foreach_binary_op_<OP>(tensors, scalars);                                                                            \
-}                                                                                                                        \
-                                                                                                                         \
-std::vector<Tensor> foreach_tensor_##NAME##_scalarlist_kernel_cuda(TensorList tensors, at::ArrayRef<double> scalars) {   \
-    check_foreach_api_restrictions(tensors, scalars);                                                                    \
-    if (!can_use_fast_route(tensors, scalars)) {                                                                         \
-        return at::native::foreach_tensor_##NAME##_scalarlist_kernel_slow(tensors, scalars);                             \
-    }                                                                                                                    \
-                                                                                                                         \
-    return foreach_binary_op<OP>(tensors, scalars);                                                                      \
+#define FOREACH_BINARY_OP_SCALARLIST(NAME, OP)                                                                          \
+void foreach_tensor_##NAME##_scalarlist_kernel_cuda_(TensorList tensors, at::ArrayRef<Scalar> scalars) {                \
+    check_foreach_api_restrictions(tensors, scalars);                                                                   \
+    if (!can_use_fast_route(tensors, scalars)) {                                                                        \
+        return at::native::foreach_tensor_##NAME##_scalarlist_kernel_slow_(tensors, scalars);                           \
+    }                                                                                                                   \
+                                                                                                                        \
+    foreach_binary_op_<OP>(tensors, scalars);                                                                           \
+}                                                                                                                       \
+                                                                                                                        \
+std::vector<Tensor> foreach_tensor_##NAME##_scalarlist_kernel_cuda(TensorList tensors, at::ArrayRef<Scalar> scalars) {  \
+    check_foreach_api_restrictions(tensors, scalars);                                                                   \
+    if (!can_use_fast_route(tensors, scalars)) {                                                                        \
+        return at::native::foreach_tensor_##NAME##_scalarlist_kernel_slow(tensors, scalars);                            \
+    }                                                                                                                   \
+                                                                                                                        \
+    return foreach_binary_op<OP>(tensors, scalars);                                                                     \
 }
 
 FOREACH_BINARY_OP_SCALARLIST(add, std::plus);
Original file line number	Diff line number	Diff line change
`@@ -25,15 +25,15 @@ std::vector<Tensor> foreach_tensor_##OP##_scalar_kernel_slow(TensorList tensors,`
`25`	`25`	`}`
`26`	`26`
`27`	`27`	`#define FOREACH_BINARY_OP_SCALARLIST(OP) \`
`28`		`-void foreach_tensor_##OP##_scalarlist_kernel_slow_(TensorList tensors, at::ArrayRef<double> scalars) { \`
	`28`	`+void foreach_tensor_##OP##_scalarlist_kernel_slow_(TensorList tensors, at::ArrayRef<Scalar> scalars) { \`
`29`	`29`	`check_foreach_api_restrictions(tensors, scalars); \`
`30`	`30`	`\`
`31`	`31`	`for (size_t i = 0; i < tensors.size(); i++) { \`
`32`	`32`	`tensors[i].OP##_(scalars[i]); \`
`33`	`33`	`} \`
`34`	`34`	`} \`
`35`	`35`	`\`
`36`		`-std::vector<Tensor> foreach_tensor_##OP##_scalarlist_kernel_slow(TensorList tensors, at::ArrayRef<double> scalars) { \`
	`36`	`+std::vector<Tensor> foreach_tensor_##OP##_scalarlist_kernel_slow(TensorList tensors, at::ArrayRef<Scalar> scalars) { \`
`37`	`37`	`check_foreach_api_restrictions(tensors, scalars); \`
`38`	`38`	`std::vector<Tensor> result; \`
`39`	`39`	`result.reserve(tensors.size()); \`
`@@ -128,7 +128,7 @@ void foreach_tensor_##OP##_scalar_slow_(TensorList input, TensorList tensors1, T`
`128`	`128`	`} \`
`129`	`129`
`130`	`130`	`#define FOREACH_POINTWISE_OP_SCALARLIST(OP) \`
`131`		`-std::vector<Tensor> foreach_tensor_##OP##_scalarlist_slow(TensorList input, TensorList tensors1, TensorList tensors2, at::ArrayRef<double> scalars) { \`
	`131`	`+std::vector<Tensor> foreach_tensor_##OP##_scalarlist_slow(TensorList input, TensorList tensors1, TensorList tensors2, at::ArrayRef<Scalar> scalars) { \`
`132`	`132`	`check_foreach_api_restrictions(input, tensors1, tensors2, scalars); \`
`133`	`133`	`\`
`134`	`134`	`std::vector<Tensor> result; \`
`@@ -139,7 +139,7 @@ std::vector<Tensor> foreach_tensor_##OP##_scalarlist_slow(TensorList input, Tens`
`139`	`139`	`return result; \`
`140`	`140`	`} \`
`141`	`141`	`\`
`142`		`-void foreach_tensor_##OP##_scalarlist_slow_(TensorList input, TensorList tensors1, TensorList tensors2, at::ArrayRef<double> scalars) { \`
	`142`	`+void foreach_tensor_##OP##_scalarlist_slow_(TensorList input, TensorList tensors1, TensorList tensors2, at::ArrayRef<Scalar> scalars) { \`
`143`	`143`	`check_foreach_api_restrictions(input, tensors1, tensors2, scalars); \`
`144`	`144`	`\`
`145`	`145`	`for (size_t i = 0; i < input.size(); i++) { \`
Original file line number	Diff line number	Diff line change
`@@ -4,7 +4,11 @@`
`4`	`4`	`namespace at {`
`5`	`5`	`namespace native {`
`6`	`6`	`namespace {`
`7`		`-`
	`7`	`+// Check foreach API restrictions`
	`8`	`+// - Tensor lists must be non-empty.`
	`9`	`+// - All tensors in all lists must have the same dtype.`
	`10`	`+// - All TensorLists and ScalarLists must have the same number of elements.`
	`11`	`+// - Corresponding tensors must have the same size.`
`8`	`12`	`void check_foreach_api_restrictions(TensorList tensors) {`
`9`	`13`	`TORCH_CHECK(tensors.size() > 0, "Tensor list must have at least one tensor.");`
`10`	`14`	`auto expected_dtype = tensors[0].dtype();`
`@@ -13,7 +17,7 @@ void check_foreach_api_restrictions(TensorList tensors) {`
`13`	`17`	`}`
`14`	`18`	`}`
`15`	`19`
`16`		`-void check_foreach_api_restrictions(TensorList tensors, ArrayRef<double> scalars) {`
	`20`	`+void check_foreach_api_restrictions(TensorList tensors, ArrayRef<Scalar> scalars) {`
`17`	`21`	`check_foreach_api_restrictions(tensors);`
`18`	`22`	`TORCH_CHECK(tensors.size() == scalars.size(), "Tensor list must have same number of elements as scalar list.");`
`19`	`23`	`}`
`@@ -49,7 +53,7 @@ void check_foreach_api_restrictions(TensorList tensors1, TensorList tensors2, Te`
`49`	`53`	`}`
`50`	`54`	`}`
`51`	`55`
`52`		`-void check_foreach_api_restrictions(TensorList tensors1, TensorList tensors2, TensorList tensors3, ArrayRef<double> scalars) {`
	`56`	`+void check_foreach_api_restrictions(TensorList tensors1, TensorList tensors2, TensorList tensors3, ArrayRef<Scalar> scalars) {`
`53`	`57`	`check_foreach_api_restrictions(tensors1, tensors2, tensors3);`
`54`	`58`	`TORCH_CHECK(tensors1.size() == scalars.size(), "Tensor list must have same number of elements as scalar list, got ", tensors1.size(), " and ", scalars.size());`
`55`	`59`	`}`
`@@ -85,21 +89,8 @@ bool has_same_attributes(Device expected_device, TensorList tensors) {`
`85`	`89`	`}`
`86`	`90`
`87`	`91`	`bool will_promote_tensor(const Tensor& tensor, Scalar scalar) {`
`88`		`- // complex scalar + integral or boolean tensor will result in complex tensor`
`89`		`- if (scalar.isComplex() && at::isIntegralType(tensor.scalar_type(), /includeBool/ true)) {`
`90`		`- return false;`
`91`		`- }`
`92`		`-`
`93`		`- // float scalar + integral or boolean tensor will result in float tensor`
`94`		`- if (scalar.isFloatingPoint() && at::isIntegralType(tensor.scalar_type(), /includeBool/ true)) {`
`95`		`- return false;`
`96`		`- }`
`97`		`-`
`98`		`- // integral scalar + boolean tensor will result in integral tensor`
`99`		`- if (scalar.isIntegral(/includeBool/ false) && tensor.dtype() == at::kBool) {`
`100`		`- return false;`
`101`		`- }`
`102`		`- return true;`
	`92`	`+ auto result_dtype = at::result_type(tensor, scalar);`
	`93`	`+ return result_dtype != tensor.scalar_type();`
`103`	`94`	`}`
`104`	`95`
`105`	`96`	`bool can_use_fast_route(TensorList tensors) {`
`@@ -128,7 +119,7 @@ bool can_use_fast_route(TensorList tensors, Scalar scalar) {`
`128`	`119`	`return false;`
`129`	`120`	`}`
`130`	`121`
`131`		`- if (!will_promote_tensor(t, scalar)) {`
	`122`	`+ if (will_promote_tensor(t, scalar)) {`
`132`	`123`	`return false;`
`133`	`124`	`}`
`134`	`125`	`}`
`@@ -137,8 +128,18 @@ bool can_use_fast_route(TensorList tensors, Scalar scalar) {`
`137`	`128`	`#endif`
`138`	`129`	`}`
`139`	`130`
`140`		`-bool can_use_fast_route(TensorList tensors, ArrayRef<double> scalars) {`
`141`		`- return can_use_fast_route(tensors);`
	`131`	`+bool can_use_fast_route(TensorList tensors, ArrayRef<Scalar> scalars) {`
	`132`	`+#ifdef __HIP_PLATFORM_HCC__`
	`133`	`+ return false;`
	`134`	`+#else`
	`135`	`+ for (int i = 0; i < tensors.size(); i++) {`
	`136`	`+ if (will_promote_tensor(tensors[i], scalars[i])) {`
	`137`	`+ return false;`
	`138`	`+ }`
	`139`	`+ }`
	`140`	`+`
	`141`	`+ return true;`
	`142`	`+#endif`
`142`	`143`	`}`
`143`	`144`
`144`	`145`	`bool can_use_fast_route(TensorList tensors1, TensorList tensors2) {`
`@@ -166,7 +167,7 @@ bool can_use_fast_route(TensorList tensors1, TensorList tensors2, Scalar scalar)`
`166`	`167`	`return false;`
`167`	`168`	`}`
`168`	`169`
`169`		`- if (!will_promote_tensor(tensors1[i], scalar)) {`
	`170`	`+ if (will_promote_tensor(tensors1[i], scalar)) {`
`170`	`171`	`return false;`
`171`	`172`	`}`
`172`	`173`	`}`
`@@ -200,7 +201,7 @@ bool can_use_fast_route(TensorList tensors1, TensorList tensors2, TensorList ten`
`200`	`201`	`return false;`
`201`	`202`	`}`
`202`	`203`
`203`		`- if (!will_promote_tensor(tensors1[i], scalar)) {`
	`204`	`+ if (will_promote_tensor(tensors1[i], scalar)) {`
`204`	`205`	`return false;`
`205`	`206`	`}`
`206`	`207`	`}`
`@@ -209,7 +210,7 @@ bool can_use_fast_route(TensorList tensors1, TensorList tensors2, TensorList ten`
`209`	`210`	`#endif`
`210`	`211`	`}`
`211`	`212`
`212`		`-bool can_use_fast_route(TensorList tensors1, TensorList tensors2, TensorList tensors3, ArrayRef<double> scalars) {`
	`213`	`+bool can_use_fast_route(TensorList tensors1, TensorList tensors2, TensorList tensors3, ArrayRef<Scalar> scalars) {`
`213`	`214`	`return can_use_fast_route(tensors1, tensors2, tensors3);`
`214`	`215`	`}`
`215`	`216`