pytorch
diff --git a/‎aten/src/ATen/core/Formatting.cpp‎
Lines changed: 5 additions & 0 deletions b/‎aten/src/ATen/core/Formatting.cpp‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎aten/src/ATen/core/NamedRegistrations.cpp‎
Lines changed: 1 addition & 0 deletions b/‎aten/src/ATen/core/NamedRegistrations.cpp‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎aten/src/ATen/native/AutogradComposite.cpp‎
Lines changed: 27 additions & 0 deletions b/‎aten/src/ATen/native/AutogradComposite.cpp‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎aten/src/ATen/native/VariableMethodStubs.cpp‎
Lines changed: 4 additions & 0 deletions b/‎aten/src/ATen/native/VariableMethodStubs.cpp‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎aten/src/ATen/native/native_functions.yaml‎
Lines changed: 14 additions & 0 deletions b/‎aten/src/ATen/native/native_functions.yaml‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎aten/src/ATen/templates/TensorBody.h‎
Lines changed: 17 additions & 0 deletions b/‎aten/src/ATen/templates/TensorBody.h‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎c10/core/TensorImpl.cpp‎
Lines changed: 11 additions & 0 deletions b/‎c10/core/TensorImpl.cpp‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎c10/core/TensorImpl.h‎
Lines changed: 38 additions & 0 deletions b/‎c10/core/TensorImpl.h‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎test/test_autograd.py‎
Lines changed: 81 additions & 0 deletions b/‎test/test_autograd.py‎
Lines changed: 81 additions & 0 deletions
diff --git a/‎test/test_namedtuple_return_api.py‎
Lines changed: 5 additions & 2 deletions b/‎test/test_namedtuple_return_api.py‎
Lines changed: 5 additions & 2 deletions
@@ -292,6 +292,11 @@ std::ostream& print(std::ostream& stream, const Tensor & tensor_, int64_t linesi
         stream << ", axis: " << tensor_.q_per_channel_axis();
       }
     }
+
+    auto& fw_grad = tensor.fw_grad(/* level */ 0);
+    if (fw_grad.defined()) {
+      stream << ", tangent:" << std::endl << fw_grad;
+    }
     stream << " ]";
   }
   return stream;
 
@@ -510,4 +510,5 @@ TORCH_LIBRARY_IMPL(aten, Named, m) {
   m.impl("_version", CppFunction::makeFallthrough());
   m.impl("requires_grad_", CppFunction::makeFallthrough());
   m.impl("retain_grad", CppFunction::makeFallthrough());
+  m.impl("_fw_primal", CppFunction::makeFallthrough());
 }
@@ -0,0 +1,27 @@
+#include <ATen/ATen.h>
+
+namespace at {
+namespace native {
+
+/// This function can be used to create a dual Tensor that holds a tangent to compute forward mode gradients.
+/// Note that the dual Tensor's primal is a view of the given primal and the given tangent is used as-is.
+/// This function is backward differentiable.
+at::Tensor make_dual(const at::Tensor& primal, const at::Tensor& tangent, int64_t level) {
+  TORCH_CHECK(!primal.fw_grad(level).defined(), "Making a dual Tensor based on a Tensor that "
+              "already has a forward gradient at the same level ", level, " is not supported.");
+
+  auto dual_tensor = primal.view(primal.sizes());
+  dual_tensor.set_fw_grad(tangent, level, /* is_inplace_op */ false);
+  return dual_tensor;
+}
+
+/// This function can be used to unpack a given dual Tensor to get its primal and tangent. The returned primal
+/// is a view of the dual and the tangent is returned as is.
+/// This function is backward differentiable.
+std::tuple<at::Tensor, at::Tensor> unpack_dual(const at::Tensor& tensor, int64_t level) {
+  return std::tuple<at::Tensor, at::Tensor>(tensor._fw_primal(level), tensor.fw_grad(level));
+}
+
+} // namespace native
+
+} // namespace at
@@ -40,5 +40,9 @@ void retain_grad(Tensor& self) {
   AT_ERROR("retain_grad is not implemented for Tensor");
 }
 
+Tensor _fw_primal(const Tensor& self, int64_t level) {
+  AT_ERROR("_fw_primal is not implemented for Tensor");
+}
+
 } // namespace native
 } // namespace at
@@ -105,6 +105,20 @@
   manual_kernel_registration: True
   variants: method
 
+- func: _fw_primal(Tensor(a) self, int level) -> Tensor(a)
+  use_c10_dispatcher: full
+  variants: method
+  dispatch:
+    DefaultBackend: _fw_primal
+
+- func: make_dual(Tensor(a) primal, Tensor tangent, int level) -> Tensor(a)
+  use_c10_dispatcher: full
+  variants: function
+
+- func: unpack_dual(Tensor(a) dual, int level) -> (Tensor(a) primal, Tensor tangent)
+  use_c10_dispatcher: full
+  variants: function
+
 - func: rename_(Tensor(a!) self, Dimname[]? names) -> Tensor(a!)
   use_c10_dispatcher: full
   variants: method
 
@@ -599,6 +599,23 @@ class TORCH_API Tensor {
     return impl_->grad();
   }
 
+  // The Forward AD API functions below are low level and are not to be used by end
+  // users who should use the API provided in torch/csrc/autograd.h
+
+  /// This function returns the forward gradient for this Tensor at the given level.
+  const Tensor& fw_grad(uint64_t level) const {
+    return impl_->fw_grad(level, *this);
+  }
+
+  /// This function can be used to set the value of the forward grad.
+  /// Note that the given new_grad might not be used directly if it has different
+  /// metadata (size/stride/storage offset) compared to this Tensor. In that case,
+  /// new_grad content will be copied into a new Tensor
+  void set_fw_grad(const Tensor& new_grad, uint64_t level, bool is_inplace_op) {
+    impl_->set_fw_grad(new_grad, *this, level, is_inplace_op);
+  }
+
+
   // STOP.  Thinking of adding a method here, which only makes use
   // of other ATen methods?  Define it in native_functions.yaml.
 
 
@@ -44,6 +44,17 @@ const at::Tensor& TensorImpl::grad() const {
   return autograd_meta_->grad();
 }
 
+const at::Tensor& TensorImpl::fw_grad(uint64_t level, const at::Tensor& self) const {
+  // See TensorImpl::grad() above for explanation about the line below
+  if (!autograd_meta_) return impl::GetAutogradMetaFactory()->undefined_tensor();
+  return autograd_meta_->fw_grad(level, self);
+}
+
+void TensorImpl::set_fw_grad(const at::Tensor& new_grad, const at::Tensor& self, uint64_t level, bool is_inplace_op) {
+  if (!autograd_meta_) autograd_meta_ = impl::GetAutogradMetaFactory()->make();
+  autograd_meta_->set_fw_grad(new_grad, self, level, is_inplace_op);
+}
+
 TensorImpl::TensorImpl(
     Storage&& storage,
     DispatchKeySet key_set,
 
@@ -136,6 +136,8 @@ struct C10_API AutogradMetaInterface {
   virtual bool requires_grad() const = 0;
   virtual at::Tensor& mutable_grad() = 0;
   virtual const at::Tensor& grad() const = 0;
+  virtual const at::Tensor& fw_grad(uint64_t level, const at::Tensor& self) const = 0;
+  virtual void set_fw_grad(const at::Tensor& new_grad, const at::Tensor& self, uint64_t level, bool is_inplace_op) = 0;
   virtual ~AutogradMetaInterface();
 };
 
@@ -598,6 +600,42 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
    */
   const at::Tensor& grad() const;
 
+  /**
+   * Return the accumulated gradient of a tensor. This gradient is computed
+   * using forward mode AD.
+   *
+   * This is an internal API that should never be used by end users.
+   *
+   * The API is as follows:
+   *   - "level" allows to specify the level of forward AD nesting for which the
+   *     gradient should be returned. Note that since levels are not fully
+   *     supported yet, this argument should be 0. See documentation for
+   *     torch::autograd::enter_dual_level for more details about forward AD nesting.
+   *   - "self" should represent the Tensor whose forward grad is accessed. It is
+   *     required when dealing with view.
+   */
+  const at::Tensor& fw_grad(uint64_t level, const at::Tensor& self) const;
+
+  /**
+   * Sets the forward gradient for this Tensor.
+   * The given Tensor might not be used directly and its content will be copied.
+   *
+   * This is an internal API that should never be used by end users.
+   *
+   * The API is as follows:
+   *   - "new_grad" is a Tensor containing the new value of the gradient that should
+   *     be set
+   *   - "self" should reprensent the Tensor whose forward grad is accessed. It is
+   *     required when dealing with view.
+   *   - "level" allows to specify the level of forward AD nesting for which the
+   *     gradient should be set. Note that since levels are not fully supported
+   *     yet, this argument should be 0. See documentation for torch::autograd::enter_dual_level
+   *     for more details about forward AD nesting.
+   *   - "is_inplace_op" is a boolean flag that tells if this gradient was generated
+   *     by an inplace operation or an out of place one. This allows better error checking.
+   */
+  void set_fw_grad(const at::Tensor& new_grad, const at::Tensor& self, uint64_t level, bool is_inplace_op);
+
   /**
    * Return a typed data pointer to the actual data which this tensor refers to.
    * This checks that the requested type (from the template parameter) matches
 
@@ -35,6 +35,7 @@
                                                   IS_WINDOWS, IS_MACOS, CudaMemoryLeakCheck)
 from torch.autograd import Variable, Function, detect_anomaly, kineto_available
 from torch.autograd.function import InplaceFunction
+import torch.autograd.forward_ad as fwAD
 from torch.testing import randn_like
 from torch.testing._internal.common_methods_invocations import (method_tests,
                                                                 create_input, unpack_variables,
@@ -5326,6 +5327,26 @@ def fn(a, dim0_size=5):
 
         self.assertEqual(x.grad, y.grad)
 
+    def test_view_with_multi_output(self):
+        x = torch.randn(2, 2, 2, dtype=torch.double)
+
+        x1 = torch.view_as_complex(x)
+        # Taking an invalid view should always be allowed as long as it is not
+        # modified inplace
+        res = x1.unbind(0)
+
+        with self.assertRaisesRegex(RuntimeError, "output of a function that returns multiple views"):
+            res[0] += torch.rand(2, requires_grad=True)
+
+        x.requires_grad_(True)
+        x1 = torch.view_as_complex(x)
+        # Taking an invalid view should always be allowed as long as it is not
+        # modified inplace
+        res = x1.unbind(0)
+
+        with self.assertRaisesRegex(RuntimeError, "output of a function that returns multiple views"):
+            res[0] += torch.rand(2, requires_grad=True)
+
     def as_identity(self):
         # view_as_real and view_as_complex behavior should be like an identity
         def func(z):
@@ -6324,6 +6345,66 @@ def foo(a):
         self.assertEqual(hvp, torch.mm(hes, v.unsqueeze(1)).squeeze(1))
         self.assertEqual(vhp, torch.mm(v.unsqueeze(0), hes).squeeze(0))
 
+class TestAutogradForwardMode(TestCase):
+    def test_forward_level_cleanup(self):
+        import weakref
+
+        def get_tensor_and_weak_ref():
+            # Helper function to get a Tensor and a weak ref that tells us
+            # if the c++ version of this Tensor is still alive or not.
+            #
+            # Create the following reference chain to do so:
+            #   - python Tensor t
+            #   - c++ Tensor corresponding by t
+            #   - c++ Node corresponding to t.grad_fn
+            #   - python dict of metadata from this Node
+            #   - an object in this dict that we can take a weakref of
+
+
+            # Create a new Tensor and Node
+            t = torch.rand(2, requires_grad=True).clone()
+            # Create the metadata dict
+            meta_dict = t.grad_fn.metadata
+            # Create the object in the dict
+
+            class Foo(object):
+                pass
+            my_obj = Foo()
+            meta_dict[0] = my_obj
+
+            # After exiting this function, the python Tensor t is the only
+            # thing keeping ref alive
+            ref = weakref.ref(my_obj)
+            return t, ref
+
+        # Sanity check that the helper function works as expected
+        t, t_ref = get_tensor_and_weak_ref()
+        self.assertIsNotNone(t_ref())
+
+        del t
+        self.assertIsNone(t_ref())
+
+        # Main test code
+        foo = torch.rand(2)
+
+        with fwAD.dual_level():
+            tangent, tangent_ref = get_tensor_and_weak_ref()
+            self.assertIsNotNone(tangent_ref())
+
+            dual = fwAD.make_dual(foo, tangent)
+            self.assertIsNotNone(tangent_ref())
+
+            # Make sure that the tangent we provided has been re-used as is
+            self.assertTrue(fwAD.unpack_dual(dual)[1] is tangent)
+
+            # Make sure that dual is keeping the tangent alive
+            del tangent
+            self.assertIsNotNone(tangent_ref())
+
+            # Make sure that the dual level does not keep the c++
+            # version of the tangent alive
+            del dual
+            self.assertIsNone(tangent_ref())
 
 # Generic device type autograd tests.
 class TestAutogradDeviceType(TestCase):
 
@@ -12,7 +12,7 @@
 all_operators_with_namedtuple_return = {
     'max', 'min', 'median', 'nanmedian', 'mode', 'kthvalue', 'svd', 'symeig', 'eig',
     'qr', 'geqrf', 'solve', 'slogdet', 'sort', 'topk', 'lstsq',
-    'triangular_solve', 'cummax', 'cummin', 'linalg_eigh'
+    'triangular_solve', 'cummax', 'cummin', 'linalg_eigh', "unpack_dual"
 }
 
 
@@ -65,6 +65,7 @@ def test_namedtuple_return(self):
             op(operators=['triangular_solve'], input=(a,), names=('solution', 'cloned_coefficient'), hasout=True),
             op(operators=['lstsq'], input=(a,), names=('solution', 'QR'), hasout=True),
             op(operators=['linalg_eigh'], input=("L",), names=('eigenvalues', 'eigenvectors'), hasout=True),
+            op(operators=['unpack_dual'], input=(a, 0), names=('primal', 'tangent'), hasout=False),
         ]
 
         for op in operators:
@@ -75,7 +76,9 @@ def test_namedtuple_return(self):
                     for i, name in enumerate(op.names):
                         self.assertIs(getattr(ret, name), ret[i])
                 else:
-                    ret = getattr(a, f)(*op.input)
+                    # Handle op that are not methods
+                    func = getattr(a, f) if hasattr(a, f) else getattr(torch, f)
+                    ret = func(*op.input)
                     for i, name in enumerate(op.names):
                         self.assertIs(getattr(ret, name), ret[i])
                     if op.hasout:
Original file line number	Diff line number	Diff line change
`@@ -292,6 +292,11 @@ std::ostream& print(std::ostream& stream, const Tensor & tensor_, int64_t linesi`
`292`	`292`	`stream << ", axis: " << tensor_.q_per_channel_axis();`
`293`	`293`	`}`
`294`	`294`	`}`
	`295`	`+`
	`296`	`+ auto& fw_grad = tensor.fw_grad(/* level */ 0);`
	`297`	`+ if (fw_grad.defined()) {`
	`298`	`+ stream << ", tangent:" << std::endl << fw_grad;`
	`299`	`+ }`
`295`	`300`	`stream << " ]";`
`296`	`301`	`}`
`297`	`302`	`return stream;`
Original file line number	Diff line number	Diff line change
`@@ -510,4 +510,5 @@ TORCH_LIBRARY_IMPL(aten, Named, m) {`
`510`	`510`	`m.impl("_version", CppFunction::makeFallthrough());`
`511`	`511`	`m.impl("requires_grad_", CppFunction::makeFallthrough());`
`512`	`512`	`m.impl("retain_grad", CppFunction::makeFallthrough());`
	`513`	`+ m.impl("_fw_primal", CppFunction::makeFallthrough());`
`513`	`514`	`}`
Original file line number	Diff line number	Diff line change
`@@ -40,5 +40,9 @@ void retain_grad(Tensor& self) {`
`40`	`40`	`AT_ERROR("retain_grad is not implemented for Tensor");`
`41`	`41`	`}`
`42`	`42`
	`43`	`+Tensor _fw_primal(const Tensor& self, int64_t level) {`
	`44`	`+ AT_ERROR("_fw_primal is not implemented for Tensor");`
	`45`	`+}`
	`46`	`+`
`43`	`47`	`} // namespace native`
`44`	`48`	`} // namespace at`