[CPU] Update custom ops for the CPU backend (#20255)

Signed-off-by: jiang1.li <jiang1.li@intel.com>
2025-07-01 15:25:03 +08:00
parent 9909726d2a
commit 6cc1e7d96d
23 changed files with 5357 additions and 101 deletions
--- a/vllm/model_executor/layers/vocab_parallel_embedding.py
+++ b/vllm/model_executor/layers/vocab_parallel_embedding.py
@@ -43,7 +43,7 @@ class UnquantizedEmbeddingMethod(QuantizeMethodBase):
              layer: torch.nn.Module,
              x: torch.Tensor,
              bias: Optional[torch.Tensor] = None) -> torch.Tensor:
-        return dispatch_unquantized_gemm()(x, layer.weight, bias)
+        return dispatch_unquantized_gemm()(layer, x, layer.weight, bias)

    def embedding(self, layer: torch.nn.Module,
                  input_: torch.Tensor) -> torch.Tensor: