[BugFix] Fix quantization for all other methods (#11547)
Some checks failed
Create Release / Create Release (push) Has been cancelled

This commit is contained in:
Robert Shaw
2024-12-27 01:23:29 -05:00
committed by GitHub
parent 1b875a0ef3
commit 2339d59f92
6 changed files with 52 additions and 22 deletions

View File

@@ -601,14 +601,13 @@ class Fp8MoEMethod(FusedMoEMethodBase):
router_logits: torch.Tensor,
top_k: int,
renormalize: bool,
use_grouped_topk: bool,
use_grouped_topk: bool = False,
topk_group: Optional[int] = None,
num_expert_group: Optional[int] = None,
custom_routing_function: Optional[Callable] = None,
scoring_func: str = "softmax",
e_score_correction_bias: Optional[torch.Tensor] = None,
) -> torch.Tensor:
from vllm.model_executor.layers.fused_moe import fused_experts
topk_weights, topk_ids = FusedMoE.select_experts(