fix(mxfp4): return is_monolithic=False when LoRA is enabled for Triton backend (#35382)
Signed-off-by: Seungho Yoon <yoonsnowdev@gmail.com>
This commit is contained in:
@@ -1001,6 +1001,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def is_monolithic(self) -> bool:
|
def is_monolithic(self) -> bool:
|
||||||
|
if self.moe.is_lora_enabled:
|
||||||
|
return False
|
||||||
return (
|
return (
|
||||||
self.mxfp4_backend == Mxfp4Backend.SM100_FI_MXFP4_MXFP8_TRTLLM
|
self.mxfp4_backend == Mxfp4Backend.SM100_FI_MXFP4_MXFP8_TRTLLM
|
||||||
or self.mxfp4_backend == Mxfp4Backend.SM100_FI_MXFP4_BF16
|
or self.mxfp4_backend == Mxfp4Backend.SM100_FI_MXFP4_BF16
|
||||||
|
|||||||
Reference in New Issue
Block a user