[Inductor][MPS] Fix half-precision type mismatches in Metal shader codegen (#176436)

mergennachin · pytorchmergebot · commit 49261924e55c · 2026-03-06T23:46:25.000Z
Metal Shading Language rejects implicit float-to-bfloat conversions, so bare float literals like `0.0` in generated shaders cause compilation failures when the target variable is `bfloat` (or `half`). Three codegen methods were affected: - `constant()` ignored its `dtype` parameter and returned raw literals. - `masked()` assigned a bare literal in the else-branch (`} else tmp = 0.0;`). - `where()` passed a bare literal through the ternary without casting. All three now emit `static_cast<bfloat>(...)` / `static_cast<half>(...)` where needed. Tests added for half-precision constants, reductions, and conditionals. Pull Request resolved: #176436 Approved by: https://github.com/malfet
diff --git a/test/inductor/test_torchinductor.py b/test/inductor/test_torchinductor.py
@@ -15914,6 +15914,34 @@ def fn(x, index, source):
         result = torch.compile(fn)(x_base.clone()[:, 2:, :], index, source)
         self.assertEqual(result, expected)
 
+    def test_bfloat_constant(self):
+        if not self.is_dtype_supported(torch.bfloat16):
+            raise unittest.SkipTest("bfloat16 not supported")
+        self.common(
+            lambda x: x + 1.0,
+            (make_tensor(1024, dtype=torch.bfloat16, device=self.device),),
+        )
+
+    @parametrize("dtype", [torch.float16, torch.bfloat16])
+    def test_lowp_reduction(self, dtype):
+        if not self.is_dtype_supported(dtype):
+            raise unittest.SkipTest(f"{dtype} not supported")
+        self.common(
+            lambda x: x.sum(),
+            (make_tensor(1024, dtype=dtype, device=self.device),),
+            check_lowp=False,
+        )
+
+    @parametrize("dtype", [torch.float16, torch.bfloat16])
+    def test_lowp_where(self, dtype):
+        if not self.is_dtype_supported(dtype):
+            raise unittest.SkipTest(f"{dtype} not supported")
+        self.common(
+            lambda x: torch.where(x > 0.5, x, x.new_zeros(())),
+            (make_tensor(1024, dtype=dtype, device=self.device),),
+            check_lowp=False,
+        )
+
     # end of class CommonTemplate - add new tests here
 
 
diff --git a/torch/_inductor/codegen/mps.py b/torch/_inductor/codegen/mps.py
@@ -243,13 +243,17 @@ def masked(mask: CSEVariable, body: sympy.Expr, other: CSEVariable) -> str:
             )
             with V.kernel.compute.indent():
                 V.kernel.compute.splice(scoped_body)
-                V.kernel.compute.writeline(f"{var} = {rc};")
-            V.kernel.compute.writeline(f"}} else {var} = {other_str};")
+                V.kernel.compute.writeline(
+                    f"{var} = static_cast<decltype({var})>({rc});"
+                )
+            V.kernel.compute.writeline(
+                f"}} else {var} = static_cast<decltype({var})>({other_str});"
+            )
         return var
 
     @staticmethod
     def where(a: OpVarT, b: OpVarT, c: OpVarT) -> str:
-        return f"{a} ? {b} : {value_to_metal(c)}"
+        return f"{a} ? {b} : static_cast<decltype({b})>({value_to_metal(c)})"
 
     @staticmethod
     def remainder(a: OpVarT, b: OpVarT) -> str: