[inductor] Fix cpp_wrapper codegen for ir.ComplexView (#116481)

desertfire · pytorchmergebot · commit a81edf9f2304 · 2024-01-02T05:38:58.000Z
Pull Request resolved: #116481 Approved by: https://github.com/htyu
diff --git a/test/inductor/test_cpu_cpp_wrapper.py b/test/inductor/test_cpu_cpp_wrapper.py
@@ -133,6 +133,7 @@ class BaseTest(NamedTuple):
         code_string_count: dict = {}
 
     for item in [
+        BaseTest("test_add_complex2"),
         BaseTest("test_as_strided"),  # buffer reuse
         BaseTest("test_bernoulli1"),
         BaseTest("test_bitwise"),  # int32
diff --git a/test/inductor/test_cuda_cpp_wrapper.py b/test/inductor/test_cuda_cpp_wrapper.py
@@ -151,6 +151,7 @@ class BaseTest(NamedTuple):
 
     # Maintain two separate test lists for cuda and cpp for now
     for item in [
+        BaseTest("test_add_complex2"),
         BaseTest("test_as_strided"),  # buffer reuse
         BaseTest("test_batch_norm_2d_2"),
         BaseTest("test_bernoulli1"),
diff --git a/test/inductor/test_torchinductor.py b/test/inductor/test_torchinductor.py
@@ -670,7 +670,9 @@ def fn(a, b):
         y = torch.tensor([1 + 1j, -1 + 1j, -2 + 2j, 3 - 3j, 0, 1j, 1, -1])
 
         _, code = run_and_get_code(fn, x, y)
-        self.assertEqual(code[0].count("aten.view"), 3)
+        self.assertEqual(
+            code[0].count("::view_dtype" if config.cpp_wrapper else "aten.view"), 3
+        )
 
     def test_add_complex3(self):
         # fix https://github.com/pytorch/pytorch/issues/115071
diff --git a/torch/_inductor/ir.py b/torch/_inductor/ir.py
@@ -4399,6 +4399,21 @@ class ExternKernelNode:
 }
 
 
+def get_aten_cpp_kernel_name(kernel):
+    # Calling with the default kernel name can lead to ambiguous behavior like the following example.
+    # repeat_interleave(const at::Tensor & repeats, c10::optional<int64_t> output_size=c10::nullopt)
+    # repeat_interleave(const at::Tensor & self, int64_t repeats,
+    #       c10::optional<int64_t> dim=c10::nullopt, c10::optional<int64_t> output_size=c10::nullopt)
+    assert (
+        isinstance(kernel, torch._ops.OpOverload) and kernel.namespace == "aten"
+    ), "Invalid aten kernel"
+    return (
+        f"at::{kernel.__name__.split('.')[0]}"
+        if kernel._overloadname == "default"
+        else f"at::_ops::{kernel.__name__.replace('.', '_')}::call"
+    )
+
+
 class FallbackKernel(ExternKernelAlloc):
     args_default_value: List[Dict[str, Any]]
 
@@ -4655,8 +4670,6 @@ def codegen(self, wrapper):
         if kernel.namespace == "aten":
             # Aten Fallback Ops
             assert isinstance(kernel, torch._ops.OpOverload)
-            op_base_name = kernel.__name__.split(".")[0]
-
             if V.graph.cpp_wrapper:
                 if config.is_fbcode() and kernel not in has_c_shim:
                     log.warning(
@@ -4666,17 +4679,8 @@ def codegen(self, wrapper):
                     self.use_runtime_dispatch = True
                     self.set_cpp_kernel(kernel)
                 else:
-                    # Calling with the default kernel name can lead to ambiguous behavior like the following example.
-                    # repeat_interleave(const at::Tensor & repeats, c10::optional<int64_t> output_size=c10::nullopt)
-                    # repeat_interleave(const at::Tensor & self, int64_t repeats,
-                    #       c10::optional<int64_t> dim=c10::nullopt, c10::optional<int64_t> output_size=c10::nullopt)
-                    self.cpp_kernel_name = (
-                        f"at::{op_base_name}"
-                        if kernel._overloadname == "default"
-                        else f"at::_ops::{kernel.__name__.replace('.', '_')}::call"
-                    )
+                    self.cpp_kernel_name = get_aten_cpp_kernel_name(kernel)
                     schema = kernel._schema
-
                     self.args_default_value = [
                         {"type": x.real_type, "value": x.default_value}
                         for x in schema.arguments
@@ -4691,7 +4695,7 @@ def codegen(self, wrapper):
                         if x.kwarg_only
                     }
             else:
-                self.python_kernel_name = f"aten.{op_base_name}"
+                self.python_kernel_name = str(kernel)
 
         elif isinstance(kernel, torch._ops.HigherOrderOperator):
             if getattr(torch._prims.rng_prims, kernel.__name__, None) is kernel:
@@ -4825,6 +4829,7 @@ def __init__(
         self,
         layout,
         python_kernel_name,
+        cpp_kernel_name,
         tensor_args,
         nontensor_args,
     ):
@@ -4838,6 +4843,7 @@ def __init__(
         # output through the abi-compatible interface.
         self.outputs: Sequence[Any] = []
         self.python_kernel_name = python_kernel_name
+        self.cpp_kernel_name = cpp_kernel_name
 
     @classmethod
     def create(cls, kernel, *args, **kwargs):
@@ -4854,7 +4860,11 @@ def create(cls, kernel, *args, **kwargs):
         assert device, "Not sure where to find device info"
 
         packed = ComplexView(
-            MultiOutputLayout(device), str(kernel), tensor_args, non_tensor_args
+            MultiOutputLayout(device),
+            str(kernel),
+            get_aten_cpp_kernel_name(kernel),
+            tensor_args,
+            non_tensor_args,
         )
 
         layout = FixedLayout(