Refactor Unary Ops tests (#49712)

Iurii Zdebskyi · facebook-github-bot · commit cc7a28d7271a · 2021-03-19T09:28:00.000-07:00
Summary: Pull Request resolved: #49712 Test Plan: Imported from OSS Reviewed By: zou3519 Differential Revision: D25673712 Pulled By: izdeby fbshipit-source-id: 4420d5d129026195097d914e410b75b144bea795
diff --git a/aten/src/ATen/native/cuda/ForeachUnaryOp.cu b/aten/src/ATen/native/cuda/ForeachUnaryOp.cu
@@ -211,6 +211,10 @@ OP_CUSTOM_FUNCTOR(floating_complex_half_bfloat16, reciprocal, Reciprocal)
 
 std::vector<Tensor> foreach_tensor_neg_cuda(TensorList tensors) {
     check_foreach_api_restrictions(tensors);
+    TORCH_CHECK(tensors[0].scalar_type() != kBool,
+                "_foreach_neg: There is a bool tensor in the passed-in TensorList. "
+                "Negation on a bool tensor is not supported. If you are trying to invert a mask, please use the `~`"
+                "or `logical_not()` operator on the individual tensors instead.");
 
     if (!can_use_fast_route(tensors)) {
         return at::native::foreach_tensor_neg_slow(tensors);
@@ -221,6 +225,10 @@ std::vector<Tensor> foreach_tensor_neg_cuda(TensorList tensors) {
 
 void foreach_tensor_neg_cuda_(TensorList tensors) {
     check_foreach_api_restrictions(tensors);
+    TORCH_CHECK(tensors[0].scalar_type() != kBool,
+                "_foreach_neg: There is a bool tensor in the passed-in TensorList. "
+                "Negation on a bool tensor is not supported. If you are trying to invert a mask, please use the `~`"
+                "or `logical_not()` operator on the individual tensors instead.");
 
     if (!can_use_fast_route(tensors)) {
         return at::native::foreach_tensor_neg_slow_(tensors);
diff --git a/test/test_foreach.py b/test/test_foreach.py
@@ -1,8 +1,10 @@
 import torch
 import unittest
 from torch.testing._internal.common_utils import TestCase, run_tests, TEST_WITH_ROCM, TEST_WITH_SLOW
-from torch.testing._internal.common_device_type import instantiate_device_type_tests, dtypes, skipCUDAIfRocm
+from torch.testing._internal.common_device_type import \
+    (instantiate_device_type_tests, dtypes, skipCUDAIfRocm, ops)
 from torch._six import inf, nan
+from torch.testing._internal.common_methods_invocations import foreach_unary_op_db
 
 # Includes some values such that N * N won't be a multiple of 4,
 # which should ensure we test the vectorized and non-vectorized
@@ -17,39 +19,6 @@ class TestForeach(TestCase):
         (torch._foreach_div, torch._foreach_div_, torch.div),
     ]
 
-    unary_ops = [
-        # foreach_op, foreach_op_, torch_op, bf16, complex64/128
-        (torch._foreach_sqrt, torch._foreach_sqrt_, torch.sqrt, True , True),
-        (torch._foreach_exp, torch._foreach_exp_, torch.exp, True, True),
-        (torch._foreach_acos, torch._foreach_acos_, torch.acos, False, True),
-        (torch._foreach_asin, torch._foreach_asin_, torch.asin, False, True),
-        (torch._foreach_atan, torch._foreach_atan_, torch.atan, False, True),
-        (torch._foreach_cos, torch._foreach_cos_, torch.cos, True, True),
-        (torch._foreach_cosh, torch._foreach_cosh_, torch.cosh, False, True),
-        (torch._foreach_log, torch._foreach_log_, torch.log, True, True),
-        (torch._foreach_log10, torch._foreach_log10_, torch.log10, True, True),
-        (torch._foreach_log2, torch._foreach_log2_, torch.log2, True, True),
-        (torch._foreach_neg, torch._foreach_neg_, torch.neg, True, True),
-        (torch._foreach_tan, torch._foreach_tan_, torch.tan, False, True),
-        (torch._foreach_tanh, torch._foreach_tanh_, torch.tanh, True, True),
-        (torch._foreach_sin, torch._foreach_sin_, torch.sin, False, True),
-        (torch._foreach_sinh, torch._foreach_sinh_, torch.sinh, False, True),
-        (torch._foreach_ceil, torch._foreach_ceil_, torch.ceil, False, False),
-        (torch._foreach_erf, torch._foreach_erf_, torch.erf, True, False),
-        (torch._foreach_erfc, torch._foreach_erfc_, torch.erfc, False, False),
-        (torch._foreach_expm1, torch._foreach_expm1_, torch.expm1, False, False),
-        (torch._foreach_floor, torch._foreach_floor_, torch.floor, False, False),
-        (torch._foreach_log1p, torch._foreach_log1p_, torch.log1p, True, False),
-        (torch._foreach_round, torch._foreach_round_, torch.round, False, False),
-        (torch._foreach_frac, torch._foreach_frac_, torch.frac, False, False),
-        (torch._foreach_reciprocal, torch._foreach_reciprocal_, torch.reciprocal, True, True),
-        (torch._foreach_sigmoid, torch._foreach_sigmoid_, torch.sigmoid, True, False),
-        (torch._foreach_trunc, torch._foreach_trunc_, torch.trunc, False, False),
-
-        # See test_abs
-        # (torch._foreach_abs, torch._foreach_abs_, torch.abs, True, True),
-    ]
-
     def _get_test_data(self, device, dtype, N):
         if dtype in [torch.bfloat16, torch.bool, torch.float16]:
             tensors = [torch.randn(N, N, device=device).to(dtype) for _ in range(N)]
@@ -157,90 +126,27 @@ def _test_bin_op_list_alpha(self, device, dtype, foreach_op, foreach_op_, torch_
             else:
                 self.assertEqual(tensors1, expected)
 
-    #
-    # Unary ops
-    #
-    @dtypes(*(torch.testing.floating_and_complex_types_and(torch.bfloat16, torch.half)))
-    def test_unary_ops(self, device, dtype):
-        for fe_op, fe_op_, torch_op, support_bfloat16, support_complex in self.unary_ops:
-            for N in N_values:
-                tensors1 = self._get_test_data(device, dtype, N)
-                # Mimics cuda kernel dtype flow.  With fp16/bf16 input, runs in fp32 and casts output back to fp16/bf16.
-                control_dtype = torch.float32 if (self.device_type == 'cuda' and
-                                                  (dtype is torch.float16 or dtype is torch.bfloat16)) else dtype
-
-                if self.device_type == 'cpu' and dtype == torch.half and torch_op not in [torch.neg, torch.frac, torch.reciprocal]:
-                    with self.assertRaisesRegex(RuntimeError, r"not implemented for \'Half\'"):
-                        expected = [torch_op(tensors1[i]) for i in range(N)]
-
-                    with self.assertRaisesRegex(RuntimeError, r"not implemented for \'Half\'"):
-                        res = fe_op(tensors1)
-                    break
-
-                if dtype == torch.bfloat16 and not support_bfloat16:
-                    if self.device_type == 'cuda' or torch_op in [torch.sinh, torch.cosh]:
-                        with self.assertRaisesRegex(RuntimeError, r"not implemented for \'BFloat16\'"):
-                            expected = [torch_op(tensors1[i]) for i in range(N)]
-
-                        with self.assertRaisesRegex(RuntimeError, r"not implemented for \'BFloat16\'"):
-                            res = fe_op(tensors1)
-                        break
-
-                if dtype in [torch.complex64, torch.complex128] and not support_complex:
-                    if not (self.device_type == 'cpu' and torch_op in [torch.sigmoid]):
-                        # not using assertRaisesRegex due to different error messages
-                        with self.assertRaises(RuntimeError):
-                            expected = [torch_op(tensors1[i]) for i in range(N)]
-
-                        with self.assertRaises(RuntimeError):
-                            res = fe_op(tensors1)
-                        break
-
-                expected = [torch_op(tensors1[i].to(dtype=control_dtype)).to(dtype=dtype) for i in range(N)]
-                res = fe_op(tensors1)
-                if (dtype is torch.float16 or dtype is torch.bfloat16) and TEST_WITH_ROCM:
-                    self.assertEqual(res, expected, atol=1.e-3, rtol=self.dtype_precisions[dtype][0])
-
-                    fe_op_(tensors1)
-                    self.assertEqual(res, tensors1)
-                else:
-                    self.assertEqual(res, expected)
-
-                    fe_op_(tensors1)
-                    self.assertEqual(res, tensors1)
-
-    # Separate test for abs due to a lot of special cases
-    # Absolute value of a complex number a + bj is defined as sqrt(a^2 + b^2), i.e. a floating point
-    @dtypes(*torch.testing.get_all_dtypes())
-    def test_abs(self, device, dtype):
+    @ops(foreach_unary_op_db)
+    def test_unary(self, device, dtype, op):
         for N in N_values:
-            tensors1 = self._get_test_data(device, dtype, N)
-            # Mimics cuda kernel dtype flow.  With fp16/bf16 input, runs in fp32 and casts output back to fp16/bf16.
-            control_dtype = torch.float32 if (self.device_type == 'cuda' and
-                                              (dtype is torch.float16 or dtype is torch.bfloat16)) else dtype
-
-            if dtype == torch.bool and self.device_type == 'cpu':
-                with self.assertRaisesRegex(RuntimeError, r"not implemented for"):
-                    expected = [torch.abs(tensors1[i].to(dtype=control_dtype)).to(dtype=dtype) for i in range(N)]
-                continue
-
-            expected = [torch.abs(tensors1[i].to(dtype=control_dtype)).to(dtype=dtype) for i in range(N)]
-            res = torch._foreach_abs(tensors1)
-            if (dtype is torch.float16 or dtype is torch.bfloat16) and TEST_WITH_ROCM:
-                self.assertEqual(res, expected, atol=1.e-3, rtol=self.dtype_precisions[dtype][0])
-
-                torch._foreach_abs_(tensors1)
-                self.assertEqual(res, tensors1)
+            tensors = op.sample_inputs(device, dtype, N)
+            expected = [op.ref(t) for t in tensors]
+
+            method = op.get_method()
+            inplace = op.get_inplace()
+            actual = method(tensors)
+            self.assertEqual(expected, actual)
+
+            if op.safe_casts_outputs and dtype in torch.testing.integral_types_and(torch.bool):
+                with self.assertRaisesRegex(RuntimeError, "can't be cast to the desired output type"):
+                    inplace(tensors)
+            elif dtype in [torch.complex64, torch.complex128] and inplace == torch._foreach_abs_:
+                # Special case for abs
+                with self.assertRaisesRegex(RuntimeError, r"In-place abs is not supported for complex tensors."):
+                    inplace(tensors)
             else:
-                expected = [torch.abs(tensors1[i]) for i in range(N)]
-                self.assertEqual(res, expected)
-
-                if dtype in [torch.complex64, torch.complex128]:
-                    with self.assertRaisesRegex(RuntimeError, r"In-place abs is not supported for complex tensors."):
-                        torch._foreach_abs_(tensors1)
-                else:
-                    torch._foreach_abs_(tensors1)
-                    self.assertEqual(res, tensors1)
+                inplace(tensors)
+                self.assertEqual(tensors, actual)
 
     #
     # Pointwise ops
@@ -294,7 +200,6 @@ def test_min_max(self, device, dtype):
             res_min = torch._foreach_minimum(tensors1, tensors2)
             self.assertEqual(res_min, expected_min)
 
-
     @dtypes(*(torch.testing.get_all_fp_dtypes(include_half=True, include_bfloat16=False)))
     def test_max_min_float_inf_nan(self, device, dtype):
         a = [
diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py
@@ -987,6 +987,47 @@ def __init__(self,
                                             **kwargs)
         self.ref = ref
 
+def sample_inputs_foreach(self, device, dtype, N):
+    tensors = [make_tensor((N, N), device, dtype) for _ in range(N)]
+    return tensors
+
+
+def get_foreach_method_names(name):
+    # get torch inplace reference function
+    method_name = "_foreach_" + name
+    method_name_inplace = "_foreach_" + name + "_"
+
+    method = getattr(torch, method_name, None)
+    method_inplace = getattr(torch, method_name_inplace, None)
+
+    ref = getattr(torch.Tensor, name, None)
+
+    return method, method_inplace, ref
+
+class ForeachUnaryFuncInfo(OpInfo):
+    """Early version of a specialized OpInfo for foreach unary functions"""
+    def __init__(self,
+                 name,
+                 dtypes=floating_and_complex_types(),
+                 dtypesIfCPU=all_types_and_complex(),
+                 dtypesIfCUDA=floating_and_complex_types_and(torch.half),
+                 dtypesIfROCM=None,
+                 safe_casts_outputs=True,
+                 sample_inputs_func=sample_inputs_foreach,
+                 **kwargs):
+        super(ForeachUnaryFuncInfo, self).__init__("_foreach_" + name,
+                                                   dtypes=dtypes,
+                                                   dtypesIfCPU=dtypesIfCPU,
+                                                   dtypesIfCUDA=dtypesIfCUDA,
+                                                   dtypesIfROCM=dtypesIfROCM,
+                                                   safe_casts_outputs=safe_casts_outputs,
+                                                   sample_inputs_func=sample_inputs_func,
+                                                   **kwargs)
+
+        foreach_method, foreach_method_inplace, torch_ref_method = get_foreach_method_names(name)
+        self.method_variant = foreach_method
+        self.inplace_variant = foreach_method_inplace
+        self.ref = torch_ref_method
 
 class HermitianOpInfo(OpInfo):
     """Operator information for Hermitian functions
@@ -1561,7 +1602,6 @@ def _make_tensor_helper(shape, low=None, high=None):
 
     return samples
 
-
 def sample_inputs_lerp(op_info, device, dtype, requires_grad):
     def _make_tensor_helper(shape, low=None, high=None):
         return make_tensor(shape, device, dtype, low=low, high=high, requires_grad=requires_grad)
@@ -1606,6 +1646,94 @@ def _make_tensor_helper(shape, low=None, high=None):
 
     return samples
 
+foreach_unary_op_db: List[OpInfo] = [
+    ForeachUnaryFuncInfo('exp'),
+    ForeachUnaryFuncInfo('acos'),
+    ForeachUnaryFuncInfo('asin'),
+    ForeachUnaryFuncInfo('atan'),
+    ForeachUnaryFuncInfo('cos'),
+    ForeachUnaryFuncInfo('cosh'),
+    ForeachUnaryFuncInfo('log'),
+    ForeachUnaryFuncInfo('log10'),
+    ForeachUnaryFuncInfo('log2'),
+    ForeachUnaryFuncInfo('tan'),
+    ForeachUnaryFuncInfo('tanh'),
+    ForeachUnaryFuncInfo('sin'),
+    ForeachUnaryFuncInfo('sinh'),
+
+    ForeachUnaryFuncInfo('neg',
+                         dtypes=all_types_and_complex(),
+                         dtypesIfCPU=all_types_and_complex(),
+                         dtypesIfCUDA=all_types_and_complex(),
+                         sample_inputs_func=sample_inputs_foreach,
+                         safe_casts_outputs=False),
+
+    ForeachUnaryFuncInfo('sqrt',
+                         dtypes=floating_types(),
+                         dtypesIfCPU=floating_and_complex_types_and(torch.bfloat16),
+                         dtypesIfCUDA=floating_and_complex_types_and(torch.half)),
+
+    ForeachUnaryFuncInfo('ceil',
+                         dtypes=floating_types(),
+                         dtypesIfCPU=floating_types_and(torch.bfloat16),
+                         dtypesIfCUDA=floating_types_and(torch.half)),
+
+    ForeachUnaryFuncInfo('erf',
+                         dtypes=floating_types(),
+                         dtypesIfCPU=floating_types_and(torch.bfloat16),
+                         dtypesIfCUDA=floating_types_and(torch.half)),
+
+    ForeachUnaryFuncInfo('erfc',
+                         dtypes=floating_types(),
+                         dtypesIfCPU=floating_types_and(torch.bfloat16),
+                         dtypesIfCUDA=floating_types_and(torch.half)),
+
+    ForeachUnaryFuncInfo('expm1',
+                         dtypes=floating_types(),
+                         dtypesIfCPU=floating_types_and(torch.bfloat16),
+                         dtypesIfCUDA=floating_types_and(torch.half)),
+
+    ForeachUnaryFuncInfo('floor',
+                         dtypes=floating_types(),
+                         dtypesIfCPU=floating_types_and(torch.bfloat16),
+                         dtypesIfCUDA=floating_types_and(torch.half)),
+
+    ForeachUnaryFuncInfo('log1p',
+                         dtypes=floating_types(),
+                         dtypesIfCPU=floating_types_and(torch.bfloat16),
+                         dtypesIfCUDA=floating_types_and(torch.half)),
+
+    ForeachUnaryFuncInfo('round',
+                         dtypes=floating_types(),
+                         dtypesIfCPU=floating_types_and(torch.bfloat16),
+                         dtypesIfCUDA=floating_types_and(torch.half)),
+
+    ForeachUnaryFuncInfo('frac',
+                         dtypes=floating_types(),
+                         dtypesIfCPU=floating_types_and(torch.bfloat16),
+                         dtypesIfCUDA=floating_types_and(torch.half)),
+
+    ForeachUnaryFuncInfo('reciprocal',
+                         dtypes=floating_types(),
+                         dtypesIfCPU=floating_types_and(torch.bfloat16),
+                         dtypesIfCUDA=floating_types_and(torch.half)),
+
+    ForeachUnaryFuncInfo('sigmoid',
+                         dtypes=floating_types(),
+                         dtypesIfCPU=floating_types_and(torch.bfloat16),
+                         dtypesIfCUDA=floating_types_and(torch.half)),
+
+    ForeachUnaryFuncInfo('trunc',
+                         dtypes=floating_types(),
+                         dtypesIfCPU=floating_types_and(torch.bfloat16),
+                         dtypesIfCUDA=floating_types_and(torch.half)),
+
+    ForeachUnaryFuncInfo('abs',
+                         dtypes=all_types_and_complex_and(torch.bfloat16, torch.half, torch.bool),
+                         dtypesIfCPU=all_types_and_complex_and(torch.bfloat16, torch.half),
+                         dtypesIfCUDA=all_types_and_complex_and(torch.bfloat16, torch.half, torch.bool),
+                         safe_casts_outputs=False)
+]
 
 # Operator database (sorted alphabetically)
 op_db: List[OpInfo] = [