[Misc] Fix Current vLLM config is not set. warnings, assert to avoid issues in the future (#31747)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com> Signed-off-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com> Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
This commit is contained in:
@@ -13,10 +13,28 @@ from vllm.model_executor.layers.quantization.utils.layer_utils import replace_pa
|
||||
from vllm.utils.torch_utils import direct_register_custom_op
|
||||
|
||||
_CPU_MOE_LAYER_CACHE = {}
|
||||
_CPU_MOE_ACT = {
|
||||
"silu": SiluAndMul(),
|
||||
"swigluoai": SwigluOAIAndMul(),
|
||||
}
|
||||
|
||||
|
||||
class _LazyActivationDict(dict):
|
||||
"""Lazily instantiate activation functions on first access.
|
||||
|
||||
Avoids triggering CustomOp.__init__() at module import time,
|
||||
which would call get_current_vllm_config() before config is set.
|
||||
"""
|
||||
|
||||
_factories: dict[str, type[SiluAndMul] | type[SwigluOAIAndMul]] = {
|
||||
"silu": SiluAndMul,
|
||||
"swigluoai": SwigluOAIAndMul,
|
||||
}
|
||||
|
||||
def __missing__(self, key: str) -> SiluAndMul | SwigluOAIAndMul:
|
||||
if key not in self._factories:
|
||||
raise KeyError(f"{key} is not a supported activation")
|
||||
self[key] = self._factories[key]()
|
||||
return self[key]
|
||||
|
||||
|
||||
_CPU_MOE_ACT = _LazyActivationDict()
|
||||
|
||||
|
||||
def grouped_topk(
|
||||
@@ -212,7 +230,7 @@ class CPUFusedMOE:
|
||||
apply_router_weight_on_input: bool = False,
|
||||
activation: str = "silu",
|
||||
) -> torch.Tensor:
|
||||
assert activation in _CPU_MOE_ACT, f"{activation} is not supported."
|
||||
assert activation in _CPU_MOE_ACT._factories, f"{activation} is not supported."
|
||||
assert not apply_router_weight_on_input
|
||||
|
||||
topk_weights, topk_ids = select_experts(
|
||||
|
||||
@@ -540,6 +540,20 @@ class FusedMoE(CustomOp):
|
||||
self.apply_router_weight_on_input = apply_router_weight_on_input
|
||||
self.activation = activation
|
||||
|
||||
self._grouped_topk_impl: GroupedTopk | None = None
|
||||
if self.use_grouped_topk:
|
||||
assert self.num_expert_group is not None
|
||||
assert self.topk_group is not None
|
||||
self._grouped_topk_impl = GroupedTopk(
|
||||
topk=self.top_k,
|
||||
renormalize=self.renormalize,
|
||||
num_expert_group=self.num_expert_group,
|
||||
topk_group=self.topk_group,
|
||||
scoring_func=self.scoring_func,
|
||||
routed_scaling_factor=self.routed_scaling_factor,
|
||||
num_fused_shared_experts=self.num_fused_shared_experts,
|
||||
)
|
||||
|
||||
if self.scoring_func != "softmax" and not self.use_grouped_topk:
|
||||
raise ValueError(
|
||||
"Only softmax scoring function is supported for non-grouped topk."
|
||||
@@ -1588,19 +1602,8 @@ class FusedMoE(CustomOp):
|
||||
|
||||
# DeepSeekv2 uses grouped_top_k
|
||||
elif self.use_grouped_topk and valid_grouping():
|
||||
assert self.topk_group is not None
|
||||
assert self.num_expert_group is not None
|
||||
grouped_topk_impl = GroupedTopk(
|
||||
topk=self.top_k,
|
||||
renormalize=self.renormalize,
|
||||
num_expert_group=self.num_expert_group,
|
||||
topk_group=self.topk_group,
|
||||
scoring_func=self.scoring_func,
|
||||
routed_scaling_factor=self.routed_scaling_factor,
|
||||
num_fused_shared_experts=self.num_fused_shared_experts,
|
||||
)
|
||||
|
||||
topk_weights, topk_ids = grouped_topk_impl(
|
||||
assert self._grouped_topk_impl is not None
|
||||
topk_weights, topk_ids = self._grouped_topk_impl(
|
||||
hidden_states=hidden_states,
|
||||
gating_output=router_logits,
|
||||
e_score_correction_bias=self.e_score_correction_bias,
|
||||
|
||||
Reference in New Issue
Block a user