Add support to Mistral large 3 eagle with dense layers (#36163)
Signed-off-by: juliendenize <julien.denize@mistral.ai> Signed-off-by: Julien Denize <40604584+juliendenize@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
|
||||||
|
import copy
|
||||||
from collections.abc import Iterable
|
from collections.abc import Iterable
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
@@ -33,7 +34,9 @@ class EagleMistralLarge3Model(DeepseekV2Model):
|
|||||||
):
|
):
|
||||||
nn.Module.__init__(self)
|
nn.Module.__init__(self)
|
||||||
|
|
||||||
config = vllm_config.model_config.hf_config
|
config = copy.deepcopy(vllm_config.model_config.hf_config)
|
||||||
|
config.first_k_dense_replace += start_layer_id
|
||||||
|
|
||||||
quant_config = vllm_config.quant_config
|
quant_config = vllm_config.quant_config
|
||||||
self.config = config
|
self.config = config
|
||||||
self.vllm_config = vllm_config
|
self.vllm_config = vllm_config
|
||||||
@@ -53,6 +56,7 @@ class EagleMistralLarge3Model(DeepseekV2Model):
|
|||||||
DeepseekV2DecoderLayer(
|
DeepseekV2DecoderLayer(
|
||||||
vllm_config=vllm_config,
|
vllm_config=vllm_config,
|
||||||
prefix=maybe_prefix(prefix, f"layers.{i + start_layer_id}"),
|
prefix=maybe_prefix(prefix, f"layers.{i + start_layer_id}"),
|
||||||
|
config=config,
|
||||||
)
|
)
|
||||||
for i in range(self.config.num_hidden_layers)
|
for i in range(self.config.num_hidden_layers)
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -19,6 +19,10 @@ def adapt_config_dict(
|
|||||||
if bool(config_dict.get("quantization")):
|
if bool(config_dict.get("quantization")):
|
||||||
config_dict = _remap_mistral_quantization_args(config_dict)
|
config_dict = _remap_mistral_quantization_args(config_dict)
|
||||||
|
|
||||||
|
is_mla = bool(config_dict.get("qk_nope_head_dim"))
|
||||||
|
if is_mla:
|
||||||
|
config_dict = _remap_mistral_mla_args(config_dict)
|
||||||
|
|
||||||
is_moe = bool(config_dict.get("moe"))
|
is_moe = bool(config_dict.get("moe"))
|
||||||
is_mistral_large_3 = (
|
is_mistral_large_3 = (
|
||||||
is_moe and (config_dict["moe"].get("num_shared_experts") or 0) > 0
|
is_moe and (config_dict["moe"].get("num_shared_experts") or 0) > 0
|
||||||
@@ -291,3 +295,22 @@ def _remap_moe_args(config: dict) -> dict:
|
|||||||
config["scoring_func"] = "softmax"
|
config["scoring_func"] = "softmax"
|
||||||
|
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
|
||||||
|
def _remap_mistral_mla_args(config: dict) -> dict:
|
||||||
|
if not config.get("moe"):
|
||||||
|
moe = {
|
||||||
|
"num_experts": 1,
|
||||||
|
"first_k_dense_replace": config.get("num_hidden_layers"),
|
||||||
|
"route_every_n": 1,
|
||||||
|
"num_shared_experts": 1,
|
||||||
|
"expert_hidden_dim": config.get("intermediate_size"),
|
||||||
|
"num_experts_per_tok": 1,
|
||||||
|
"routed_scale": 1.0,
|
||||||
|
"renorm_strategy": "WEIGHTS",
|
||||||
|
"use_load_balancing_bias": False,
|
||||||
|
"num_expert_groups": 1,
|
||||||
|
"num_expert_groups_per_tok": 1,
|
||||||
|
}
|
||||||
|
config["moe"] = moe
|
||||||
|
return config
|
||||||
|
|||||||
Reference in New Issue
Block a user