pytorch · Baranowski · Apr 13, 2020
diff --git a/aten/src/ATen/Declarations.cwrap b/aten/src/ATen/Declarations.cwrap
@@ -146,19 +146,6 @@
       - THIndexTensor* index
       - real value
 ]]
-[[
-  name: _th_equal
-  cname: equal
-  backends:
-    - CUDA
-  cuda_bool: True
-  variants:
-    - function
-  return: bool
-  arguments:
-    - THTensor* self
-    - THTensor* other
-]]
 [[
   name: _th_mode
   variants: function

diff --git a/aten/src/ATen/native/cuda/ReduceLogicKernel.cu b/aten/src/ATen/native/cuda/ReduceLogicKernel.cu
@@ -24,4 +24,21 @@ void or_kernel_cuda(TensorIterator& iter) {
 REGISTER_DISPATCH(and_stub, &and_kernel_cuda);
 REGISTER_DISPATCH(or_stub, &or_kernel_cuda);
 
+bool cuda_equal(const Tensor& self, const Tensor &src) {
+  if (!at::namedinference::are_names_equal(
+        self.unsafeGetTensorImpl(), src.unsafeGetTensorImpl())) {
+    return false;
+  }
+  at::NoNamesGuard guard;
+  TORCH_CHECK(self.device() == src.device(), "Cannot compare two tensors on "
+      "different devices. Got: ", self.device(), " and ", src.device());
+  if (self.sizes() != src.sizes()) {
+    return false;
+  }
+  if (self.numel() == 0) {
+    return true;
+  }
+  return at::native::eq(self, src).all().item().to<bool>();
+}
+
 }} // namespace at::native
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -5014,7 +5014,7 @@
   variants: method, function
   dispatch:
     CPU: cpu_equal
-    CUDA: legacy::cuda::_th_equal
+    CUDA: cuda_equal
     QuantizedCPU: quantized_equal_cpu
 
 - func: pow.Tensor_Tensor_out(Tensor self, Tensor exponent, *, Tensor(a!) out) -> Tensor(a!)

diff --git a/aten/src/THC/generic/THCTensorMathPairwise.cu b/aten/src/THC/generic/THCTensorMathPairwise.cu
@@ -4,41 +4,6 @@
 
 #include <ATen/NamedTensorUtils.h>
 
-static int THCTensor_(equalImpl)(THCState *state, THCTensor *self_, THCTensor *src_)
-{
-  THCAssertSameGPU(THCTensor_(checkGPU)(state, 2, self_, src_));
-  if (!THCTensor_(isSameSizeAs(state, self_, src_))) {
-    return 0;
-  }
-
-  // This is not as efficient as TH, but the basic idea: create a buffer that stores
-  // 1 if the two tensors are equal at a position, otherwise 0. If the minimum value
-  // in this buffer is 1, the two tensors are equal, otherwise they are not
-
-  // Both tensors are empty
-  if(THTensor_(nElement)(self_) == 0) return true;
-
-  THCudaByteTensor *buf = at::empty_like(THTensor_wrap(self_), at::kByte).unsafeReleaseTensorImpl();
-
-  if (!THC_pointwiseApply3<uint8_t, scalar_t, scalar_t>(state, buf, self_, src_, TensorEQOp<scalar_t, unsigned char>())) {
-    THArgCheck(false, 2, CUTORCH_DIM_WARNING);
-  }
-
-  unsigned char min = THTensor_wrap(buf).min().item<unsigned char>();
-
-  THCudaByteTensor_free(state, buf);
-
-  return min != 0;
-}
-
-int THCTensor_(equal)(THCState *state, THCTensor *self_, THCTensor *src_) {
-  if (!at::namedinference::are_names_equal(self_, src_)) {
-    return 0;
-  }
-  at::NoNamesGuard guard;
-  return THCTensor_(equalImpl)(state, self_, src_);
-}
-
 #if !defined(THC_REAL_IS_BOOL)
 
 void THCTensor_(mul)(THCState *state, THCTensor *self_, THCTensor *src_, scalar_t value)