Apply fixes for CUDA 13 (#24599)
Signed-off-by: Aidyn-A <aidyn.b.aitzhan@gmail.com>
This commit is contained in:
@@ -1,15 +1,10 @@
|
||||
#include "common.cuh"
|
||||
#include "dispatch_utils.h"
|
||||
#include "../../cub_helpers.h"
|
||||
#include "../vectorization_utils.cuh"
|
||||
#include <c10/cuda/CUDAGuard.h>
|
||||
#include <ATen/cuda/Exceptions.h>
|
||||
|
||||
#ifndef USE_ROCM
|
||||
#include <cub/cub.cuh>
|
||||
#else
|
||||
#include <hipcub/hipcub.hpp>
|
||||
#endif
|
||||
|
||||
namespace vllm {
|
||||
|
||||
template <typename scalar_t, typename fp8_type>
|
||||
@@ -116,7 +111,7 @@ __global__ void dynamic_per_token_scaled_fp8_quant_kernel_strided(
|
||||
using BlockReduce = cub::BlockReduce<float, 256>;
|
||||
__shared__ typename BlockReduce::TempStorage tmp;
|
||||
const float block_max =
|
||||
BlockReduce(tmp).Reduce(absmax_val, cub::Max{}, blockDim.x);
|
||||
BlockReduce(tmp).Reduce(absmax_val, CubMaxOp{}, blockDim.x);
|
||||
|
||||
__shared__ float token_scale;
|
||||
if (tid == 0) {
|
||||
|
||||
Reference in New Issue
Block a user