fix(mxfp4): return is_monolithic=False when LoRA is enabled for Triton backend (#35382)

Signed-off-by: Seungho Yoon <yoonsnowdev@gmail.com>
This commit is contained in:
Seungho Yoon
2026-03-01 23:59:30 +09:00
committed by GitHub
parent 59d7af9c6c
commit 5a435507d8

View File

@@ -1001,6 +1001,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
@property
def is_monolithic(self) -> bool:
if self.moe.is_lora_enabled:
return False
return (
self.mxfp4_backend == Mxfp4Backend.SM100_FI_MXFP4_MXFP8_TRTLLM
or self.mxfp4_backend == Mxfp4Backend.SM100_FI_MXFP4_BF16