[Bugfix] Disable shared expert overlap if Marlin MoE is used (#28410)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
@@ -216,6 +216,7 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
|
||||
def __init__(self, moe: FusedMoEConfig):
|
||||
super().__init__(moe)
|
||||
self.mxfp4_backend = get_mxfp4_backend(moe.is_lora_enabled)
|
||||
self.use_marlin = self.mxfp4_backend == Mxfp4Backend.MARLIN
|
||||
self.max_capture_size = (
|
||||
get_current_vllm_config().compilation_config.max_cudagraph_capture_size
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user