Allow Tensor-likes in torch.autograd.gradcheck (#43877)

hameerabbasi · facebook-github-bot · commit f9a0d0c21e0f · 2020-09-10T09:02:17.000-07:00
Summary: Fixes #42942 Pull Request resolved: #43877 Reviewed By: zou3519 Differential Revision: D23493257 Pulled By: ezyang fbshipit-source-id: 6cdaabe17157b484e9491189706ccc15420ac239
diff --git a/test/test_overrides.py b/test/test_overrides.py
@@ -644,27 +644,107 @@ def test(self):
 
 generate_tensor_like_override_tests(TestTorchFunctionOverride)
 
-class TestEinsumOverride(TestCase):
-    "Regression test for gh-38479"
-    def test_wrapper(self):
-        class Wrapper():
-            "Basic data container that knows how to unwrap itself"
-            def __init__(self, data):
-                self.data = data
+class Wrapper:
+    "Basic data container that knows how to unwrap itself"
+    def __init__(self, data):
+        self.__dict__["_data"] = data
+        self.__dict__["used_attrs"] = set()
+        self.__dict__["used_calls"] = set()
+
+    def __getattr__(self, name):
+        if name in self.__dict__:
+            return self.__dict__[name]
+        self.used_attrs.add(name)
+
+        val = getattr(self._data, name)
+
+        # If it's a method
+        if callable(val):
+            c = getattr(type(self._data), name)
+            # Don't append self to args if classmethod/staticmethod
+            if c is val:
+                return lambda *a, **kw: wrap(self.__torch_function__(c, (Wrapper,), args=a, kwargs=kw))
+            # Otherwise append self to args
+            return lambda *a, **kw: wrap(self.__torch_function__(c, (Wrapper,), args=(self,) + a, kwargs=kw))
+
+        return wrap(val)
+
+    def __setattr__(self, name, value):
+        if name in self.__dict__:
+            self.__dict__[name] = value
+
+        self.used_attrs.add(name)
+        setattr(self._data, name, unwrap(value))
+
+    def __setitem__(self, key, value):
+        self._data[unwrap(key)] = unwrap(value)
+
+    def __getitem__(self, key):
+        return wrap(self._data[unwrap(key)])
+
+    def __torch_function__(self, func, types, args=(), kwargs=None):
+        if kwargs is None:
+            kwargs = {}
+        self.used_calls.add(func)
+        args = unwrap(tuple(args))
+        kwargs = {k: unwrap(v) for k, v in kwargs.items()}
+
+        return wrap(func(*args, **kwargs))
+
+    def __add__(self, other):
+        return self.__torch_function__(torch.add, (Wrapper,), (self, other))
+
+    def __sub__(self, other):
+        return self.__torch_function__(torch.sub, (Wrapper,), (self, other))
 
-            def __torch_function__(self, func, types, args=(), kwargs=None):
-                if kwargs is None:
-                    kwargs = {}
+    def __truediv__(self, other):
+        return self.__torch_function__(torch.true_divide, (Wrapper,), (self, other))
 
-                # unwrap inputs if necessary
-                def unwrap(v):
-                    return v.data if isinstance(v, Wrapper) else v
+    def __floordiv__(self, other):
+        return self.__torch_function__(torch.floor_divide, (Wrapper,), (self, other))
 
-                args = map(unwrap, args)
-                kwargs = {k: unwrap(v) for k, v in kwargs.items()}
+    def __ge__(self, other):
+        return self.__torch_function__(torch.ge, (Wrapper,), (self, other))
 
-                return func(*args, **kwargs)
+    def __gt__(self, other):
+        return self.__torch_function__(torch.gt, (Wrapper,), (self, other))
 
+    def __lt__(self, other):
+        return self.__torch_function__(torch.lt, (Wrapper,), (self, other))
+
+    def __le__(self, other):
+        return self.__torch_function__(torch.le, (Wrapper,), (self, other))
+
+    def __eq__(self, other):
+        return self.__torch_function__(torch.eq, (Wrapper,), (self, other))
+
+    def __ne__(self, other):
+        return self.__torch_function__(torch.ne, (Wrapper,), (self, other))
+
+    def __bool__(self):
+        return self.__torch_function__(torch.Tensor.__bool__, (Wrapper,), (self,))
+
+    def __int__(self):
+        return self.__torch_function__(torch.Tensor.__int__, (Wrapper,), (self,))
+
+
+# unwrap inputs if necessary
+def unwrap(v):
+    if type(v) in {tuple, list}:
+        return type(v)(unwrap(vi) for vi in v)
+
+    return v._data if isinstance(v, Wrapper) else v
+
+# wrap inputs if necessary
+def wrap(v):
+    if type(v) in {tuple, list}:
+        return type(v)(wrap(vi) for vi in v)
+
+    return Wrapper(v) if isinstance(v, torch.Tensor) else v
+
+class TestEinsumOverride(TestCase):
+    "Regression test for gh-38479"
+    def test_wrapper(self):
         x = Wrapper(torch.randn(5))
         y = Wrapper(torch.randn(4))
         self.assertTrue(torch.allclose(torch.einsum('i,j->ij', x, y),
@@ -678,5 +758,51 @@ def unwrap(v):
                                        torch.nn.functional.bilinear(a, c, b)))
 
 
+class TestGradCheckOverride(TestCase):
+    "Test that wrappers work with gradcheck."
+    def test_gradcheck(self):
+        from torch.autograd import gradcheck
+
+        a = wrap(torch.tensor(5.0, dtype=torch.double))
+        b = wrap(torch.tensor(6.0, dtype=torch.double))
+
+        a.requires_grad = True
+        b.requires_grad = True
+
+        gradcheck(torch.add, (a, b), raise_exception=False)
+
+        total_used_attrs = a.used_attrs.union(b.used_attrs)
+        total_used_calls = a.used_calls.union(b.used_calls)
+
+        # These attributes (and the functions below) may change
+        # if the gradcheck implementation changes. It's best to
+        # aim for attributes that may be commonly present on other
+        # Tensor-likes.
+        self.assertEqual(total_used_attrs, {
+            'data',
+            'dtype',
+            'is_floating_point',
+            'is_sparse',
+            'layout',
+            'nelement',
+            'new_zeros',
+            'requires_grad',
+            'retain_grad',
+            'size',
+            'stride',
+        })
+
+        self.assertEqual(total_used_calls, {
+            torch.Tensor.new_zeros,
+            torch.Tensor.size,
+            torch.Tensor.is_floating_point,
+            torch.Tensor.nelement,
+            torch.Tensor.retain_grad,
+            torch.Tensor.stride,
+            torch.autograd.grad,
+            torch.add,
+        })
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/torch/autograd/__init__.py b/torch/autograd/__init__.py
@@ -16,6 +16,7 @@
 from .gradcheck import gradcheck, gradgradcheck
 from .grad_mode import no_grad, enable_grad, set_grad_enabled
 from .anomaly_mode import detect_anomaly, set_detect_anomaly
+from ..overrides import has_torch_function, handle_torch_function
 from . import profiler
 from . import functional
 
@@ -167,14 +168,27 @@ def grad(
             used when computing outputs (and therefore their grad is always zero)
             is an error. Defaults to ``False``.
     """
+    outputs = (outputs,) if isinstance(outputs, torch.Tensor) else tuple(outputs)
+    inputs = (inputs,) if isinstance(inputs, torch.Tensor) else tuple(inputs)
+    overridable_args = outputs + inputs
+    if has_torch_function(overridable_args):
+        return handle_torch_function(
+            grad,
+            overridable_args,
+            outputs,
+            inputs,
+            grad_outputs=grad_outputs,
+            retain_graph=retain_graph,
+            create_graph=create_graph,
+            only_inputs=only_inputs, 
+            allow_unused=allow_unused,
+        )
+
     if not only_inputs:
         warnings.warn("only_inputs argument is deprecated and is ignored now "
                       "(defaults to True). To accumulate gradient for other "
                       "parts of the graph, please use torch.autograd.backward.")
 
-    outputs = (outputs,) if isinstance(outputs, torch.Tensor) else tuple(outputs)
-    inputs = (inputs,) if isinstance(inputs, torch.Tensor) else tuple(inputs)
-
     if grad_outputs is None:
         grad_outputs = [None] * len(outputs)
     elif isinstance(grad_outputs, torch.Tensor):
diff --git a/torch/autograd/gradcheck.py b/torch/autograd/gradcheck.py
@@ -2,6 +2,7 @@
 from torch.types import _TensorOrTensors
 from torch._six import container_abcs, istuple
 import torch.testing
+from torch.overrides import is_tensor_like
 from itertools import product
 import warnings
 from typing import Callable, Union, Optional
@@ -17,12 +18,12 @@ def zero_gradients(x):
 
 
 def make_jacobian(input, num_out):
-    if isinstance(input, torch.Tensor):
+    if is_tensor_like(input):
         if not input.is_floating_point() and not input.is_complex():
             return None
         if not input.requires_grad:
             return None
-        return torch.zeros(input.nelement(), num_out, dtype=input.dtype)
+        return input.new_zeros((input.nelement(), num_out), dtype=input.dtype, layout=torch.strided)
     elif isinstance(input, container_abcs.Iterable) and not isinstance(input, str):
         jacobians = list(filter(
             lambda x: x is not None, (make_jacobian(elem, num_out) for elem in input)))
@@ -34,7 +35,7 @@ def make_jacobian(input, num_out):
 
 
 def iter_tensors(x, only_requiring_grad=False):
-    if isinstance(x, torch.Tensor):
+    if is_tensor_like(x):
         if x.requires_grad or not only_requiring_grad:
             yield x
     elif isinstance(x, container_abcs.Iterable) and not isinstance(x, str):
@@ -253,13 +254,13 @@ def fail_test(msg):
         return False
 
     tupled_inputs = _as_tuple(inputs)
-    if any(t.is_sparse for t in tupled_inputs if isinstance(t, torch.Tensor)) and not check_sparse_nnz:
+    if not check_sparse_nnz and any(t.is_sparse for t in tupled_inputs if isinstance(t, torch.Tensor)):
         return fail_test('gradcheck expects all tensor inputs are dense when check_sparse_nnz is set to False.')
 
     # Make sure that gradients are saved for at least one input
     any_input_requiring_grad = False
     for idx, inp in enumerate(tupled_inputs):
-        if isinstance(inp, torch.Tensor) and inp.requires_grad:
+        if is_tensor_like(inp) and inp.requires_grad:
             if not (inp.dtype == torch.float64 or inp.dtype == torch.complex128):
                 warnings.warn(
                     'The {}th input requires gradient and '
diff --git a/torch/overrides.py b/torch/overrides.py
@@ -1164,3 +1164,12 @@ def is_tensor_method_or_property(func: Callable) -> bool:
        of ``torch.Tensor``.
     """
     return func in get_tensor_methods() or func.__name__ == "__get__"
+
+def is_tensor_like(inp):
+    """
+    Returns ``True`` if the passed-in input is a tensor-like.
+
+    Currently, this occurs whenever there's a ``__torch_function__``
+    attribute on the input.
+    """
+    return type(inp) is torch.Tensor or hasattr(inp, "__torch_function__")