Fix CUDA kernel index data type in vllm/csrc/quantization/fused_kernels/layernorm_utils.cuh +10 (#15159)

Signed-off-by: Lu Fang <lufang@fb.com>
Co-authored-by: Richard Barnes <rbarnes@meta.com>
This commit is contained in:
Lu Fang
2025-03-20 19:01:11 -07:00
committed by GitHub
parent 0cfe7d386d
commit d3ccbd6350
10 changed files with 124 additions and 124 deletions

View File

@@ -62,7 +62,7 @@ template <typename FType, int BLOCK, int N_MATRIX>
__global__ void f16_gemm_splitk_reduce_kernel(const FType* C_split, FType* C,
uint32_t n, uint32_t n_matrix,
uint32_t matrix_size) {
int idx = blockIdx.x * BLOCK + threadIdx.x;
auto idx = blockIdx.x * BLOCK + threadIdx.x;
if (idx >= matrix_size) {
return;
@@ -407,4 +407,4 @@ static __device__ half2 inline num2num2(const half x) {
return __half2half2(x);
}
} // namespace allspark
} // namespace allspark