Fix CUDA kernel index data type in vllm/csrc/quantization/fused_kernels/layernorm_utils.cuh +10 (#15159)

Signed-off-by: Lu Fang <lufang@fb.com> Co-authored-by: Richard Barnes <rbarnes@meta.com>
2025-03-20 19:01:11 -07:00
parent 0cfe7d386d
commit d3ccbd6350
10 changed files with 124 additions and 124 deletions
--- a/csrc/quantization/gguf/gguf_kernel.cu
+++ b/csrc/quantization/gguf/gguf_kernel.cu
@@ -19,11 +19,11 @@ template <typename scalar_t>
 static __global__ void quantize_q8_1(const scalar_t* __restrict__ x,
                                     void* __restrict__ vy, const int kx,
                                     const int kx_padded) {
-  const int ix = blockDim.x * blockIdx.x + threadIdx.x;
+  const auto ix = blockDim.x * blockIdx.x + threadIdx.x;
  if (ix >= kx_padded) {
    return;
  }
-  const int iy = blockDim.y * blockIdx.y + threadIdx.y;
+  const auto iy = blockDim.y * blockIdx.y + threadIdx.y;
  const int i_padded = iy * kx_padded + ix;

  block_q8_1* y = (block_q8_1*)vy;