From 5a435507d877f4eb16802095037d5c56e767c589 Mon Sep 17 00:00:00 2001 From: Seungho Yoon Date: Sun, 1 Mar 2026 23:59:30 +0900 Subject: [PATCH] fix(mxfp4): return is_monolithic=False when LoRA is enabled for Triton backend (#35382) Signed-off-by: Seungho Yoon --- vllm/model_executor/layers/quantization/mxfp4.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm/model_executor/layers/quantization/mxfp4.py b/vllm/model_executor/layers/quantization/mxfp4.py index 29dd03596..0ad1b8931 100644 --- a/vllm/model_executor/layers/quantization/mxfp4.py +++ b/vllm/model_executor/layers/quantization/mxfp4.py @@ -1001,6 +1001,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase): @property def is_monolithic(self) -> bool: + if self.moe.is_lora_enabled: + return False return ( self.mxfp4_backend == Mxfp4Backend.SM100_FI_MXFP4_MXFP8_TRTLLM or self.mxfp4_backend == Mxfp4Backend.SM100_FI_MXFP4_BF16