jit kernel support per-tensor quant

BBuf · BBuf · commit 4b1b88d7409e · 2025-12-24T00:10:52.000+08:00
diff --git a/python/sglang/jit_kernel/csrc/gemm/per_tensor_quant_fp8.cuh b/python/sglang/jit_kernel/csrc/gemm/per_tensor_quant_fp8.cuh
@@ -133,8 +133,6 @@ void per_tensor_quant_fp8(tvm::ffi::TensorView input,
   const size_t num_blocks = std::min((total_elements + kBlockSize - 1) / kBlockSize, size_t(1024));
   const DLDevice device = device_.unwrap();
 
-  RuntimeCheck(total_elements > 0, "Input tensor must be non-empty");
-
   auto launch_kernels = [&]<typename T>() {
     if constexpr (!kIsStatic) {
       LaunchKernel(num_blocks, kBlockSize, device)(
@@ -159,8 +157,6 @@ void per_tensor_quant_fp8(tvm::ffi::TensorView input,
     launch_kernels.template operator()<c10::BFloat16>();
   } else if (dtype.code == kDLFloat && dtype.bits == 16) {
     launch_kernels.template operator()<c10::Half>();
-  } else {
-    RuntimeCheck(false, "Unsupported input dtype");
   }
 }