Fix CUDA kernel index data type in vllm/csrc/quantization/fused_kernels/layernorm_utils.cuh +10 (#15159)

Signed-off-by: Lu Fang <lufang@fb.com>
Co-authored-by: Richard Barnes <rbarnes@meta.com>
This commit is contained in:
Lu Fang
2025-03-20 19:01:11 -07:00
committed by GitHub
parent 0cfe7d386d
commit d3ccbd6350
10 changed files with 124 additions and 124 deletions

View File

@@ -19,11 +19,11 @@ template <typename scalar_t>
static __global__ void quantize_q8_1(const scalar_t* __restrict__ x,
void* __restrict__ vy, const int kx,
const int kx_padded) {
const int ix = blockDim.x * blockIdx.x + threadIdx.x;
const auto ix = blockDim.x * blockIdx.x + threadIdx.x;
if (ix >= kx_padded) {
return;
}
const int iy = blockDim.y * blockIdx.y + threadIdx.y;
const auto iy = blockDim.y * blockIdx.y + threadIdx.y;
const int i_padded = iy * kx_padded + ix;
block_q8_1* y = (block_q8_1*)vy;