[Model] Ignore rotary embed load for Cohere model (#17319)
This commit is contained in:
@@ -418,6 +418,10 @@ class CohereForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsQuant):
|
||||
loaded_params: Set[str] = set()
|
||||
for name, loaded_weight in weights:
|
||||
|
||||
# Skip loading rotary embeddings since vLLM has its own
|
||||
if "rotary_emb.inv_freq" in name:
|
||||
continue
|
||||
|
||||
if (self.quant_config is not None and
|
||||
(scale_name := self.quant_config.get_cache_scale(name))):
|
||||
# Loading kv cache quantization scales
|
||||
|
||||
Reference in New Issue
Block a user