Deepseek v3 (#11502)
Some checks failed
Create Release / Create Release (push) Has been cancelled

Signed-off-by: mgoin <michael@neuralmagic.com>
Co-authored-by: mgoin <michael@neuralmagic.com>
Co-authored-by: robertgshaw2-neuralmagic <rshaw@neuralmagic.com>
This commit is contained in:
Simon Mo
2024-12-26 16:09:44 -08:00
committed by GitHub
parent 55fb97f7bd
commit f49777ba62
7 changed files with 886 additions and 60 deletions

View File

@@ -605,6 +605,8 @@ class Fp8MoEMethod(FusedMoEMethodBase):
topk_group: Optional[int] = None,
num_expert_group: Optional[int] = None,
custom_routing_function: Optional[Callable] = None,
scoring_func: str = "softmax",
e_score_correction_bias: Optional[torch.Tensor] = None,
) -> torch.Tensor:
from vllm.model_executor.layers.fused_moe import fused_experts
@@ -617,7 +619,10 @@ class Fp8MoEMethod(FusedMoEMethodBase):
renormalize=renormalize,
topk_group=topk_group,
num_expert_group=num_expert_group,
custom_routing_function=custom_routing_function)
custom_routing_function=custom_routing_function,
scoring_func=scoring_func,
e_score_correction_bias=e_score_correction_bias,
)
return fused_experts(
x,