[ROCm] Add AMD GPU support on Deepseek v3.2 and SparseMLA (#26670)

Signed-off-by: ganyi <ygan@amd.com>
This commit is contained in:
Pleaplusone
2025-11-20 18:54:01 +08:00
committed by GitHub
parent 6eb745d9bd
commit 06c20c9904
9 changed files with 583 additions and 15 deletions

View File

@@ -552,7 +552,11 @@ __global__ void indexer_k_quant_and_cache_kernel(
#ifndef USE_ROCM
__syncwarp();
#endif
#if defined(__gfx942__)
float scale = fmaxf(amax, 1e-4) / 224.0f;
#else
float scale = fmaxf(amax, 1e-4) / 448.0f;
#endif
if (use_ue8m0) {
scale = exp2f(ceilf(log2f(scale)));
}