[Model] Ignore rotary embed load for Cohere model (#17319)

2025-04-29 03:30:40 -04:00
parent 4464109219
commit 97cc8729f0
1 changed files with 4 additions and 0 deletions
--- a/vllm/model_executor/models/commandr.py
+++ b/vllm/model_executor/models/commandr.py
@@ -418,6 +418,10 @@ class CohereForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsQuant):
        loaded_params: Set[str] = set()
        for name, loaded_weight in weights:

+            # Skip loading rotary embeddings since vLLM has its own
+            if "rotary_emb.inv_freq" in name:
+                continue
+
            if (self.quant_config is not None and
                (scale_name := self.quant_config.get_cache_scale(name))):
                # Loading kv cache quantization scales