[BugFix] fix conditions to apply tma (#174480)

BoyuanFeng · pytorchmergebot · commit b7abe8e3ab95 · 2026-02-07T06:43:22.000Z
Summary: This diff fixes a wrong condition introduced in D92015529 for applying TMA. This should fix S618103. Test Plan: Test in mast job. Differential Revision: D92533457 Pull Request resolved: #174480 Approved by: https://github.com/Microve
diff --git a/torch/_inductor/codegen/triton.py b/torch/_inductor/codegen/triton.py
@@ -2233,11 +2233,13 @@ def can_use_tma(
             return True
         if not (
             (
-                V.graph.get_current_device_or_throw().type == "cuda"
-                and torch.cuda.get_device_capability()[0] >= 9
-                and config.assume_aligned_inputs
+                (
+                    V.graph.get_current_device_or_throw().type == "cuda"
+                    and torch.cuda.get_device_capability()[0] >= 9
+                    and config.assume_aligned_inputs
+                )
+                or V.graph.get_current_device_or_throw().type == "xpu"
             )
-            or V.graph.get_current_device_or_throw().type == "xpu"
             and config.triton.use_tensor_descriptor
             and has_triton_stable_tma_api()
             # For CUDA The base ptr needs to be aligned