[Kernel] Replaced blockReduce[...] functions with cub::BlockReduce (#7233)

Co-authored-by: Michael Goin <michael@neuralmagic.com>
This commit is contained in:
Luka Govedič
2024-08-21 20:18:00 -04:00
committed by GitHub
parent 9984605412
commit 7937009a7e
8 changed files with 237 additions and 116 deletions

View File

@@ -7,7 +7,13 @@
#include "cuda_compat.h"
#include "dispatch_utils.h"
#include "../../reduction_utils.cuh"
#ifndef USE_ROCM
#include <cub/util_type.cuh>
#include <cub/cub.cuh>
#else
#include <hipcub/util_type.hpp>
#include <hipcub/hipcub.hpp>
#endif
#ifndef USE_ROCM
using FP8_TYPE = c10::Float8_e4m3fn;
@@ -215,7 +221,10 @@ __global__ void dynamic_per_token_scaled_fp8_quant_kernel(
}
}
float const block_absmax_val_maybe = blockReduceMax(absmax_val);
using BlockReduce = cub::BlockReduce<float, 1024>;
__shared__ typename BlockReduce::TempStorage reduceStorage;
float const block_absmax_val_maybe =
BlockReduce(reduceStorage).Reduce(absmax_val, cub::Max{}, blockDim.x);
__shared__ float token_scale;
if (tid == 0) {
if (scale_ub) {