fix(mxfp4): return is_monolithic=False when LoRA is enabled for Triton backend (#35382)
Signed-off-by: Seungho Yoon <yoonsnowdev@gmail.com>
This commit is contained in:
@@ -1001,6 +1001,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
|
||||
|
||||
@property
|
||||
def is_monolithic(self) -> bool:
|
||||
if self.moe.is_lora_enabled:
|
||||
return False
|
||||
return (
|
||||
self.mxfp4_backend == Mxfp4Backend.SM100_FI_MXFP4_MXFP8_TRTLLM
|
||||
or self.mxfp4_backend == Mxfp4Backend.SM100_FI_MXFP4_BF16
|
||||
|
||||
Reference in New Issue
Block a user