[Kernels] Add activation chunking logic to FusedMoEModularKernel (#19168)

Signed-off-by: Bill Nell <bnell@redhat.com>
This commit is contained in:
bnellnm
2025-06-11 12:53:10 -04:00
committed by GitHub
parent b2d9be6f7d
commit 29fa5cac1c
15 changed files with 458 additions and 396 deletions

View File

@@ -29,6 +29,7 @@ MNK_FACTORS = [
(224, 1024, 1536),
(224, 3072, 1024),
(224, 3072, 1536),
(1024 * 128, 1024, 1024),
]
vllm_config = VllmConfig(parallel_config=ParallelConfig(