[Misc] Fix Current vLLM config is not set. warnings, assert to avoid issues in the future (#31747)

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com> Signed-off-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com> Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
2026-01-08 18:20:49 -05:00
parent 5d3b6097ad
commit 6cdf015c3c
48 changed files with 380 additions and 240 deletions
--- a/vllm/model_executor/layers/fused_moe/cpu_fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/cpu_fused_moe.py
@@ -13,10 +13,28 @@ from vllm.model_executor.layers.quantization.utils.layer_utils import replace_pa
 from vllm.utils.torch_utils import direct_register_custom_op

 _CPU_MOE_LAYER_CACHE = {}
-_CPU_MOE_ACT = {
-    "silu": SiluAndMul(),
-    "swigluoai": SwigluOAIAndMul(),
-}
+
+
+class _LazyActivationDict(dict):
+    """Lazily instantiate activation functions on first access.
+
+    Avoids triggering CustomOp.__init__() at module import time,
+    which would call get_current_vllm_config() before config is set.
+    """
+
+    _factories: dict[str, type[SiluAndMul] | type[SwigluOAIAndMul]] = {
+        "silu": SiluAndMul,
+        "swigluoai": SwigluOAIAndMul,
+    }
+
+    def __missing__(self, key: str) -> SiluAndMul | SwigluOAIAndMul:
+        if key not in self._factories:
+            raise KeyError(f"{key} is not a supported activation")
+        self[key] = self._factories[key]()
+        return self[key]
+
+
+_CPU_MOE_ACT = _LazyActivationDict()


 def grouped_topk(
@@ -212,7 +230,7 @@ class CPUFusedMOE:
        apply_router_weight_on_input: bool = False,
        activation: str = "silu",
    ) -> torch.Tensor:
-        assert activation in _CPU_MOE_ACT, f"{activation} is not supported."
+        assert activation in _CPU_MOE_ACT._factories, f"{activation} is not supported."
        assert not apply_router_weight_on_input

        topk_weights, topk_ids = select_experts(
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@@ -540,6 +540,20 @@ class FusedMoE(CustomOp):
        self.apply_router_weight_on_input = apply_router_weight_on_input
        self.activation = activation

+        self._grouped_topk_impl: GroupedTopk | None = None
+        if self.use_grouped_topk:
+            assert self.num_expert_group is not None
+            assert self.topk_group is not None
+            self._grouped_topk_impl = GroupedTopk(
+                topk=self.top_k,
+                renormalize=self.renormalize,
+                num_expert_group=self.num_expert_group,
+                topk_group=self.topk_group,
+                scoring_func=self.scoring_func,
+                routed_scaling_factor=self.routed_scaling_factor,
+                num_fused_shared_experts=self.num_fused_shared_experts,
+            )
+
        if self.scoring_func != "softmax" and not self.use_grouped_topk:
            raise ValueError(
                "Only softmax scoring function is supported for non-grouped topk."
@@ -1588,19 +1602,8 @@ class FusedMoE(CustomOp):

        # DeepSeekv2 uses grouped_top_k
        elif self.use_grouped_topk and valid_grouping():
-            assert self.topk_group is not None
-            assert self.num_expert_group is not None
-            grouped_topk_impl = GroupedTopk(
-                topk=self.top_k,
-                renormalize=self.renormalize,
-                num_expert_group=self.num_expert_group,
-                topk_group=self.topk_group,
-                scoring_func=self.scoring_func,
-                routed_scaling_factor=self.routed_scaling_factor,
-                num_fused_shared_experts=self.num_fused_shared_experts,
-            )
-
-            topk_weights, topk_ids = grouped_topk_impl(
+            assert self._grouped_topk_impl is not None
+            topk_weights, topk_ids = self._grouped_topk_impl(
                hidden_states=hidden_states,
                gating_output=router_logits,
                e_score_correction_bias=self.e_score_correction_bias,