[ROCm] Add AMD GPU support on Deepseek v3.2 and SparseMLA (#26670)
Signed-off-by: ganyi <ygan@amd.com>
This commit is contained in:
@@ -552,7 +552,11 @@ __global__ void indexer_k_quant_and_cache_kernel(
|
||||
#ifndef USE_ROCM
|
||||
__syncwarp();
|
||||
#endif
|
||||
#if defined(__gfx942__)
|
||||
float scale = fmaxf(amax, 1e-4) / 224.0f;
|
||||
#else
|
||||
float scale = fmaxf(amax, 1e-4) / 448.0f;
|
||||
#endif
|
||||
if (use_ue8m0) {
|
||||
scale = exp2f(ceilf(log2f(scale)));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user