Add BundledAOTAutogradSerializableCallable (#162170)

jamesjwu · pytorchmergebot · commit 5babb4d5c04b · 2025-09-07T23:37:31.000Z
This PR hooks up the python wrapper inductor backend to aot_compile. This is *not* the best way for us to grab the output of AOTAutograd; that involves a refactor to make AOTAutograd itself return a serializable callable. I'll do that refactor soon, but I want a basic interface to test with for now. In the medium term, we'll want aot_compile to call AOTAutograd directly, instead of using the TorchInductorWrapper's callback through compile_fx. Pull Request resolved: #162170 Approved by: https://github.com/zhxchen17 ghstack dependencies: #162169
diff --git a/test/dynamo/test_aot_compile.py b/test/dynamo/test_aot_compile.py
@@ -207,6 +207,25 @@ def backend(gm, example_inputs):
                     },
                 ).aot_compile((example_inputs, {}))
 
+    def test_aot_compile_basic_fn_inductor(self):
+        def fn(x, y):
+            return x + y
+
+        compiled_fn = torch.compile(fn, fullgraph=True, backend="inductor").aot_compile(
+            ((torch.randn(3, 4), torch.randn(3, 4)), {})
+        )
+        inputs = (torch.randn(3, 4), torch.randn(3, 4))
+        expected = fn(*inputs)
+        actual = compiled_fn(*inputs)
+        self.assertEqual(expected, actual)
+        compiled_fn.save_compiled_function(self.path())
+        torch._dynamo.reset()
+        with torch.compiler.set_stance("fail_on_recompile"):
+            with open(self.path(), "rb") as f:
+                compiled_fn = torch.compiler.load_compiled_function(f)
+            actual = compiled_fn(*inputs)
+            self.assertEqual(expected, actual)
+
 
 if __name__ == "__main__":
     from torch._dynamo.test_case import run_tests
diff --git a/torch/_dynamo/aot_compile.py b/torch/_dynamo/aot_compile.py
@@ -2,18 +2,23 @@
 import builtins
 import importlib
 import inspect
+import logging
 import pickle
 import types
 from dataclasses import dataclass
 from typing import Any, Callable, Optional
 
 import torch
 import torch.fx
+from torch._dynamo.precompile_context import PrecompileContext
 
 from . import convert_frame
 from .hooks import Hooks
 
 
+log = logging.getLogger(__name__)
+
+
 class SerializableCallable(abc.ABC):
     @classmethod
     @abc.abstractmethod
@@ -119,6 +124,65 @@ def deserialize(cls, data: bytes) -> "AOTCompiledFunction":
         return cls(artifacts)
 
 
+class BundledAOTAutogradSerializableCallable(SerializableCallable):
+    """
+    Represents a serializable callable generated by compile_fx.
+    This class wraps around the compiled function generated by AOTAutograd.
+
+    TODO: Instead of using PrecompileContext to grab it from AOTAutograd,
+    this object should be what's *returned* by aot_module_simplified.
+    We'll do that refactor in a later PR.
+    """
+
+    def __init__(self, artifact: Any) -> None:
+        """
+        Takes in a BundledAOTAutogradCacheArtifact, which is the serialized form
+        of a compiled function generated by AOTAutograd.
+        """
+
+        self.compiled_fn = artifact.after_deserialization()
+        self.data = artifact.content
+
+    def __getattr__(self, attr: Any) -> Any:
+        if hasattr(self, attr):
+            return getattr(super(), attr)
+        else:
+            return getattr(self.compiled_fn, attr)
+
+    @classmethod
+    def from_backend_id(
+        cls, backend_id: str
+    ) -> "BundledAOTAutogradSerializableCallable":
+        """
+        Takes in a backend_id, and returns a BundledAOTAutogradSerializableCallable
+        that wraps around the compiled function generated by AOTAutograd.
+        """
+        artifact = PrecompileContext.serialize_artifact_by_key(backend_id)
+        if artifact is None:
+            raise RuntimeError("No artifact found for backend_id: " + backend_id)
+        return cls(artifact)
+
+    @classmethod
+    def serialize_compile_artifacts(
+        cls, fn: "BundledAOTAutogradSerializableCallable"
+    ) -> bytes:
+        return fn.data
+
+    @classmethod
+    def deserialize_compile_artifacts(cls, data: bytes) -> Any:
+        from torch._functorch._aot_autograd.autograd_cache import (
+            BundledAOTAutogradCacheArtifact,
+        )
+
+        # The key in the artifact is not important here since we're not populating a cache,
+        # we just want to grab the callable back out of the serialized entry
+        artifact = BundledAOTAutogradCacheArtifact("", data)
+        return cls(artifact)
+
+    def __call__(self, *args: Any, **kwargs: Any) -> Any:
+        return self.compiled_fn(*args, **kwargs)
+
+
 def aot_compile_fullgraph(
     model: Any,
     example_inputs: tuple[tuple[Any, ...], dict[str, Any]],
@@ -191,12 +255,23 @@ def new_guard_filter_fn(
         assert check_fn.guards_state is not None
 
     backend_input = capture_output.backend_input
+    backend_input.graph_module._backend_id = backend_input.backend_id  # type: ignore[assignment]
     output_graph = dynamo_output.tracer_output.output_graph
     assert output_graph is not None
     import_sources = output_graph.import_sources
-    with torch._guards.tracing(TracingContext(backend_input.fake_mode)):
+    with (
+        torch._guards.tracing(TracingContext(backend_input.fake_mode)),
+        torch._functorch.config.patch("bundled_autograd_cache", True),
+    ):
         compiled_fn = backend(backend_input.graph_module, backend_input.example_inputs)
 
+    # If Inductor backend is used, grab the compiled_fn from PrecompileContext
+    # TODO: this should be replaced once we make the backend return the SerializableCallable directly.
+    if isinstance(backend, torch._TorchCompileInductorWrapper):
+        compiled_fn = BundledAOTAutogradSerializableCallable.from_backend_id(
+            backend_input.backend_id
+        )
+
     if not isinstance(compiled_fn, SerializableCallable):
         if hasattr(backend, "compiler_fn"):
             compiler_fn = backend.compiler_fn