Expert Parallelism (EP) Support for DeepSeek V2 (#12583)

This commit is contained in:
Jongseok Park
2025-02-24 07:33:20 -08:00
committed by GitHub
parent 7940d8a6a7
commit 781096e385
19 changed files with 527 additions and 59 deletions

View File

@@ -585,6 +585,8 @@ class GPTQMarlinMoEMethod(FusedMoEMethodBase):
use_grouped_topk: bool = False,
topk_group: Optional[int] = None,
num_expert_group: Optional[int] = None,
global_num_experts: int = -1,
expert_map: Optional[torch.Tensor] = None,
custom_routing_function: Optional[Callable] = None,
scoring_func: str = "softmax",
e_score_correction_bias: Optional[torch.Tensor] = None,