pytorch
diff --git a/‎aten/src/ATen/BatchingRegistrations.cpp‎
Lines changed: 5 additions & 2 deletions b/‎aten/src/ATen/BatchingRegistrations.cpp‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎aten/src/ATen/ScalarOps.cpp‎
Lines changed: 3 additions & 2 deletions b/‎aten/src/ATen/ScalarOps.cpp‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎aten/src/ATen/ScalarOps.h‎
Lines changed: 6 additions & 4 deletions b/‎aten/src/ATen/ScalarOps.h‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎aten/src/ATen/Utils.cpp‎
Lines changed: 9 additions & 16 deletions b/‎aten/src/ATen/Utils.cpp‎
Lines changed: 9 additions & 16 deletions
diff --git a/‎aten/src/ATen/Utils.h‎
Lines changed: 2 additions & 4 deletions b/‎aten/src/ATen/Utils.h‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎aten/src/ATen/native/MetaTensor.cpp‎
Lines changed: 9 additions & 14 deletions b/‎aten/src/ATen/native/MetaTensor.cpp‎
Lines changed: 9 additions & 14 deletions
diff --git a/‎aten/src/ATen/native/TensorFactories.cpp‎
Lines changed: 24 additions & 13 deletions b/‎aten/src/ATen/native/TensorFactories.cpp‎
Lines changed: 24 additions & 13 deletions
diff --git a/‎aten/src/ATen/native/TensorFactories.h‎
Lines changed: 4 additions & 4 deletions b/‎aten/src/ATen/native/TensorFactories.h‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎aten/src/ATen/native/cuda/Indexing.cu‎
Lines changed: 3 additions & 2 deletions b/‎aten/src/ATen/native/cuda/Indexing.cu‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎aten/src/ATen/native/cuda/MultinomialKernel.cu‎
Lines changed: 3 additions & 1 deletion b/‎aten/src/ATen/native/cuda/MultinomialKernel.cu‎
Lines changed: 3 additions & 1 deletion
@@ -869,10 +869,13 @@ Tensor new_zeros_batching_rule(
 Tensor new_empty_batching_rule(
     const Tensor& self,
     IntArrayRef size,
-    const TensorOptions& options) {
+    c10::optional<ScalarType> dtype,
+    c10::optional<Layout> layout,
+    c10::optional<Device> device,
+    c10::optional<bool> pin_memory) {
   auto physical_view = MultiBatchVmapTransform::logicalToPhysical(self);
   auto physical_size = physical_view.getPhysicalShape(size);
-  auto result = physical_view.tensor().new_empty(physical_size, options);
+  auto result = physical_view.tensor().new_empty(physical_size, TensorOptions().dtype(dtype).layout(layout).device(device).pinned_memory(pin_memory));
   return physical_view.newLogicalFromPhysical(result);
 }
 
 
@@ -29,10 +29,11 @@ Tensor& scalar_fill(Tensor& self, Scalar value) {
   return self;
 }
 
-Tensor scalar_tensor_static(Scalar s, const TensorOptions& options) {
+Tensor scalar_tensor_static(Scalar s, c10::optional<ScalarType> dtype_opt, c10::optional<Layout> layout_opt,
+                            c10::optional<Device> device_opt, c10::optional<bool> pin_memory_opt, c10::optional<c10::MemoryFormat> memory_format_opt) {
   at::tracer::impl::NoTracerDispatchMode tracer_guard;
   at::AutoNonVariableTypeMode non_var_type_mode(true);
-  auto result = at::detail::empty_cpu({}, options);
+  auto result = at::detail::empty_cpu({}, dtype_opt, layout_opt, device_opt, pin_memory_opt, memory_format_opt);
   scalar_fill(result, s);
   return result;
 }
 
@@ -12,7 +12,9 @@ namespace detail {
 // but we also want to skip compute_types which in not avoidable
 // in TensorIterator for now.
 Tensor& scalar_fill(Tensor& self, Scalar value);
-TORCH_API Tensor scalar_tensor_static(Scalar s, const TensorOptions& options);
+TORCH_API Tensor scalar_tensor_static(Scalar s, c10::optional<ScalarType> dtype_opt, c10::optional<Layout> layout_opt,
+                                      c10::optional<Device> device_opt, c10::optional<bool> pin_memory_opt,
+                                      c10::optional<c10::MemoryFormat> memory_format_opt);
 } // namespace detail
 } // namespace at
 
@@ -25,12 +27,12 @@ inline at::Tensor scalar_to_tensor(Scalar s, const Device device = at::kCPU) {
   // This is the fast track we have for CPU scalar tensors.
   if (device == at::kCPU && !s.isComplex()) {
     if (s.isFloatingPoint()) {
-      return at::detail::scalar_tensor_static(s, at::device(at::kCPU).dtype(at::kDouble));
+      return at::detail::scalar_tensor_static(s, at::kDouble, c10::nullopt, at::kCPU, c10::nullopt, c10::nullopt);
     } else if (s.isBoolean()) {
-      return at::detail::scalar_tensor_static(s, at::device(at::kCPU).dtype(at::kBool));
+      return at::detail::scalar_tensor_static(s, at::kBool, c10::nullopt, at::kCPU, c10::nullopt, c10::nullopt);
     } else {
       AT_ASSERT(s.isIntegral(false));
-      return at::detail::scalar_tensor_static(s, at::device(at::kCPU).dtype(at::kLong));
+      return at::detail::scalar_tensor_static(s, at::kLong, c10::nullopt, at::kCPU, c10::nullopt, c10::nullopt);
     }
   }
   if (s.isFloatingPoint()) {
 
@@ -16,32 +16,24 @@ int _crash_if_asan(int arg) {
 
 namespace detail {
 // empty_cpu is used in ScalarOps.h, which can be referenced by other ATen files. Since we want to decouple direct referencing native symbols and only access native symbols through dispatching, we move its implementation here.
-Tensor empty_cpu(
-    IntArrayRef size,
-    const TensorOptions& options,
-    c10::optional<c10::MemoryFormat> optional_memory_format) {
-  TORCH_CHECK(
-      !(options.has_memory_format() && optional_memory_format.has_value()),
-      "Cannot set memory_format both in TensorOptions and explicit argument; please delete "
-      "the redundant setter.");
-  const MemoryFormat memory_format =
-    optional_memory_format.value_or(
-      options.memory_format_opt().value_or(
-        MemoryFormat::Contiguous));
+Tensor empty_cpu(IntArrayRef size, c10::optional<ScalarType> dtype_opt, c10::optional<Layout> layout_opt,
+                 c10::optional<Device> device_opt, c10::optional<bool> pin_memory_opt, c10::optional<c10::MemoryFormat> memory_format_opt) {
+  Device device = device_or_default(device_opt);
 
-  AT_ASSERT(options.device().type() == DeviceType::CPU);
+  TORCH_CHECK(device.type() == DeviceType::CPU);
   check_size_nonnegative(size);
 
+  bool pin_memory = pinned_memory_or_default(pin_memory_opt);
   c10::Allocator* allocator;
-  if (options.pinned_memory()) {
+  if (pin_memory) {
     allocator = detail::getCUDAHooks().getPinnedMemoryAllocator();
   } else {
     allocator = at::getCPUAllocator();
   }
 
   int64_t nelements = prod_intlist(size);
-  const caffe2::TypeMeta dtype = options.dtype();
-  const int64_t size_bytes = nelements * dtype.itemsize();
+  caffe2::TypeMeta dtype = scalarTypeToTypeMeta(dtype_or_default(dtype_opt));
+  int64_t size_bytes = nelements * dtype.itemsize();
   auto storage_impl = c10::make_intrusive<StorageImpl>(
       c10::StorageImpl::use_byte_size_t(),
       size_bytes,
@@ -56,6 +48,7 @@ Tensor empty_cpu(
     tensor.unsafeGetTensorImpl()->set_sizes_contiguous(size);
   }
 
+  auto memory_format = memory_format_opt.value_or(MemoryFormat::Contiguous);
   tensor.unsafeGetTensorImpl()->empty_tensor_restride(memory_format);
 
   return tensor;
 
@@ -136,10 +136,8 @@ inline void check_size_nonnegative(IntArrayRef size) {
 
 namespace detail {
 CAFFE2_API
-Tensor empty_cpu(
-    IntArrayRef size,
-    const TensorOptions& options = {},
-    c10::optional<MemoryFormat> memory_format = c10::nullopt);
+Tensor empty_cpu(IntArrayRef size, c10::optional<ScalarType> dtype_opt, c10::optional<Layout> layout_opt,
+                 c10::optional<Device> device_opt, c10::optional<bool> pin_memory_opt, c10::optional<c10::MemoryFormat> memory_format_opt);
 } // namespace detail
 
 } // at
@@ -7,34 +7,29 @@ namespace native {
 // Will be promoted to a public API later, but not now
 Tensor empty_meta(
   IntArrayRef size,
-  const TensorOptions& options_,
-  c10::optional<c10::MemoryFormat> optional_memory_format
+  c10::optional<ScalarType> dtype,
+  c10::optional<Layout> layout,
+  c10::optional<Device> device,
+  c10::optional<bool> pin_memory,
+  c10::optional<c10::MemoryFormat> memory_format
 ) {
-  TORCH_CHECK(
-    !(options_.has_memory_format() && optional_memory_format.has_value()),
-    "Cannot set memory_format both in TensorOptions and explicit argument; please delete "
-    "the redundant setter.");
-  TensorOptions options = options_.merge_memory_format(optional_memory_format);
-
   // TODO: deduplicate this logic with empty_cpu
 
-  auto dtype = options.dtype();
-  auto device = options.device();
   auto tensor = detail::make_tensor<TensorImpl>(
     // NB: We include the computed dispatch key, not because it will actually
     // participate in dispatch, but so that tests like is_sparse/is_cuda
     // give the correct result (a CUDA meta tensor "is cuda").  If we don't
     // like this, remove the computeDispatchKey line
-    DispatchKeySet{DispatchKey::Meta, options.computeDispatchKey()},
-    dtype,
+    DispatchKeySet{DispatchKey::Meta, computeDispatchKey(dtype, layout, device)},
+    scalarTypeToTypeMeta(dtype_or_default(dtype)),
     device
   );
   if (size.size() != 1 || size[0] != 0) {
     tensor.unsafeGetTensorImpl()->set_sizes_contiguous(size);
   }
 
-  auto memory_format = options.memory_format_opt().value_or(MemoryFormat::Contiguous);
-  tensor.unsafeGetTensorImpl()->empty_tensor_restride(memory_format);
+  auto memory_format_ = memory_format.value_or(MemoryFormat::Contiguous);
+  tensor.unsafeGetTensorImpl()->empty_tensor_restride(memory_format_);
 
   return tensor;
 }
 
@@ -165,8 +165,9 @@ Tensor polar(const Tensor& abs, const Tensor& angle) {
 }
 
 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ empty ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Tensor empty_cpu(IntArrayRef size, const TensorOptions& options_, c10::optional<c10::MemoryFormat> optional_memory_format) {
-  return at::detail::empty_cpu(size, options_, optional_memory_format);
+Tensor empty_cpu(IntArrayRef size, c10::optional<ScalarType> dtype_opt, c10::optional<Layout> layout_opt,
+                 c10::optional<Device> device_opt, c10::optional<bool> pin_memory_opt, c10::optional<c10::MemoryFormat> memory_format_opt) {
+  return at::detail::empty_cpu(size, dtype_opt, layout_opt, device_opt, pin_memory_opt, memory_format_opt);
 }
 
 Tensor empty(
@@ -186,9 +187,10 @@ Tensor empty(
   return result;
 }
 
-Tensor empty_strided_cpu(IntArrayRef size, IntArrayRef stride, const TensorOptions& options) {
+Tensor empty_strided_cpu(IntArrayRef size, IntArrayRef stride, c10::optional<ScalarType> dtype_opt,
+                         c10::optional<Layout> layout_opt, c10::optional<Device> device_opt, c10::optional<bool> pin_memory_opt) {
   check_size_nonnegative(size);
-  auto t = at::native::empty_cpu({0}, options);
+  auto t = at::native::empty_cpu({0}, dtype_opt, layout_opt, device_opt, pin_memory_opt);
   at::native::resize_impl_cpu_(t.unsafeGetTensorImpl(), size, stride);
   return t;
 }
@@ -336,9 +338,16 @@ Tensor empty_like(
 Tensor new_empty(
     const Tensor& self,
     IntArrayRef size,
-    const TensorOptions& options
+    c10::optional<ScalarType> dtype_opt,
+    c10::optional<Layout> layout_opt,
+    c10::optional<Device> device_opt,
+    c10::optional<bool> pin_memory_opt
     ) {
-  return at::empty(size, self.options().merge_in(options));
+  auto dtype = dtype_opt.has_value() ? dtype_opt : optTypeMetaToScalarType(self.options().dtype_opt());
+  auto layout = layout_opt.has_value() ? layout_opt : self.options().layout_opt();
+  auto device = device_opt.has_value() ? device_opt : self.options().device_opt();
+  auto pin_memory = pin_memory_opt.has_value() ? pin_memory_opt : self.options().pinned_memory_opt();
+  return at::empty(size, dtype, layout, device, pin_memory, c10::nullopt);
 }
 
 Tensor new_empty_strided(
@@ -507,7 +516,7 @@ Tensor scalar_tensor(Scalar s, const TensorOptions& options) {
     //   auto result = at::empty({}, options);
     at::tracer::impl::NoTracerDispatchMode tracer_guard;
     at::AutoNonVariableTypeMode non_var_type_mode(true);
-    auto result = empty_cpu({}, options);
+    auto result = empty_cpu({}, optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt());
     at::native::fill_(result, s);
     return result;
   }
@@ -735,13 +744,14 @@ Tensor range(
 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ triangle ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Tensor tril_indices_cpu(
-    int64_t row, int64_t col, int64_t offset, const TensorOptions& options) {
-  check_args(row, col, options);
+    int64_t row, int64_t col, int64_t offset, c10::optional<ScalarType> dtype_opt,
+    c10::optional<Layout> layout_opt, c10::optional<Device> device_opt, c10::optional<bool> pin_memory_opt) {
+  check_args(row, col, layout_opt);
 
   auto tril_size = get_tril_size(row, col, offset);
 
   // create an empty Tensor with correct size
-  auto result = at::empty({2, tril_size}, options);
+  auto result = at::native::empty_cpu({2, tril_size}, dtype_opt, layout_opt, device_opt, pin_memory_opt);
 
   // The following three approaches result in very little performance
   // differences. Hence, the 2nd option is taken for simpler code, and to return
@@ -780,13 +790,14 @@ Tensor tril_indices_cpu(
 }
 
 Tensor triu_indices_cpu(
-    int64_t row, int64_t col, int64_t offset, const TensorOptions& options) {
-  check_args(row, col, options);
+    int64_t row, int64_t col, int64_t offset, c10::optional<ScalarType> dtype_opt,
+    c10::optional<Layout> layout_opt, c10::optional<Device> device_opt, c10::optional<bool> pin_memory_opt) {
+  check_args(row, col, layout_opt);
 
   auto triu_size = row * col - get_tril_size(row, col, offset - 1);
 
   // create an empty Tensor with correct size
-  auto result = at::empty({2, triu_size}, options);
+  auto result = at::native::empty_cpu({2, triu_size}, dtype_opt, layout_opt, device_opt, pin_memory_opt);
 
   AT_DISPATCH_ALL_TYPES(result.scalar_type(), "triu_indices", [&]() -> void {
     // fill the Tensor with correct values
 
@@ -50,14 +50,14 @@ inline int64_t get_tril_size(int64_t row, int64_t col, int64_t offset) {
 }
 
 inline void check_args(
-    int64_t row, int64_t col, const TensorOptions& options) {
+    int64_t row, int64_t col, c10::optional<Layout> layout_opt) {
   TORCH_CHECK(row >= 0, "row must be non-negative, got", row);
   TORCH_CHECK(col >= 0, "col must be non-negative, got", col);
-  if (options.has_layout()) {
+  if (layout_opt.has_value()) {
     TORCH_CHECK(
-      options.layout() == at::kStrided,
+      *layout_opt == at::kStrided,
       "only support layout=torch.strided, got",
-      options.layout())
+      *layout_opt)
   }
 }
 
 
@@ -882,7 +882,8 @@ void nonzero_cuda_out_impl(const Tensor& self, Tensor& out){
   //However, out with correct sizes and incorrect strides will have to be copied to from the intermediate we've produced.
   bool need_to_copy = out.dim() == 2 && out.sizes()[0] == num_nonzeros_h && out.sizes()[1] == self.dim() && !out.t().is_contiguous();
   at::Tensor out_temp = need_to_copy ?
-    at::native::empty_cuda({self.dim(), num_nonzeros_h}, out.options()) :
+    at::native::empty_cuda({self.dim(), num_nonzeros_h}, optTypeMetaToScalarType(out.options().dtype_opt()),
+                           out.options().layout_opt(), out.options().device_opt(), out.options().pinned_memory_opt()) :
     out.resize_({self.dim(), num_nonzeros_h});
   //Scalars are expected to produce output of size (1,0), so we can't write to it
   if (self.dim() > 0) {
@@ -931,7 +932,7 @@ Tensor& nonzero_out_cuda(Tensor& out, const Tensor& self){
 }
 
 Tensor nonzero_cuda(const Tensor& self){
-  Tensor out = at::native::empty_cuda({0}, self.options().dtype(kLong));
+  Tensor out = at::native::empty_cuda({0}, kLong, self.options().layout_opt(), self.options().device_opt(), self.options().pinned_memory_opt());
   return nonzero_out_cuda(out, self);
 }
 
 
@@ -322,7 +322,9 @@ void multinomial_kernel_impl(Tensor& result, const Tensor& self, const int64_t n
       // To exploit greater parallelism for the sampling, generate the
       // Uniform random samples in a separate kernel launch, into
       // temporarily allocated memory. The device RNG is thread-limited
-      Tensor sampled = native::empty_cuda({numDist, n_sample}, self_v.options());
+      Tensor sampled = native::empty_cuda({numDist, n_sample}, optTypeMetaToScalarType(self_v.options().dtype_opt()),
+                                          self_v.options().layout_opt(), self_v.options().device_opt(),
+                                          self_v.options().pinned_memory_opt());
       at::native::uniform_(sampled, 0.0, 1.0, generator);
 
       dim3 block(numCategories < maxThreads ? numCategories : maxThreads);
Original file line number	Diff line number	Diff line change
`@@ -29,10 +29,11 @@ Tensor& scalar_fill(Tensor& self, Scalar value) {`
`29`	`29`	`return self;`
`30`	`30`	`}`
`31`	`31`
`32`		`-Tensor scalar_tensor_static(Scalar s, const TensorOptions& options) {`
	`32`	`+Tensor scalar_tensor_static(Scalar s, c10::optional<ScalarType> dtype_opt, c10::optional<Layout> layout_opt,`
	`33`	`+ c10::optional<Device> device_opt, c10::optional<bool> pin_memory_opt, c10::optional<c10::MemoryFormat> memory_format_opt) {`
`33`	`34`	`at::tracer::impl::NoTracerDispatchMode tracer_guard;`
`34`	`35`	`at::AutoNonVariableTypeMode non_var_type_mode(true);`
`35`		`- auto result = at::detail::empty_cpu({}, options);`
	`36`	`+ auto result = at::detail::empty_cpu({}, dtype_opt, layout_opt, device_opt, pin_memory_opt, memory_format_opt);`
`36`	`37`	`scalar_fill(result, s);`
`37`	`38`	`return result;`
`38`	`39`	`}`
Original file line number	Diff line number	Diff line change
`@@ -50,14 +50,14 @@ inline int64_t get_tril_size(int64_t row, int64_t col, int64_t offset) {`
`50`	`50`	`}`
`51`	`51`
`52`	`52`	`inline void check_args(`
`53`		`- int64_t row, int64_t col, const TensorOptions& options) {`
	`53`	`+ int64_t row, int64_t col, c10::optional<Layout> layout_opt) {`
`54`	`54`	`TORCH_CHECK(row >= 0, "row must be non-negative, got", row);`
`55`	`55`	`TORCH_CHECK(col >= 0, "col must be non-negative, got", col);`
`56`		`- if (options.has_layout()) {`
	`56`	`+ if (layout_opt.has_value()) {`
`57`	`57`	`TORCH_CHECK(`
`58`		`- options.layout() == at::kStrided,`
	`58`	`+ *layout_opt == at::kStrided,`
`59`	`59`	`"only support layout=torch.strided, got",`
`60`		`- options.layout())`
	`60`	`+ *layout_opt)`
`61`	`61`	`}`
`62`	`62`	`}`
`63`	`63`