[Core] Parse vLLM engine required fields from hf_config to model_arch_config (#28454)

Signed-off-by: Xingyu Liu <charlotteliu12x@gmail.com>
Signed-off-by: Xingyu Liu <38244988+charlotte12l@users.noreply.github.com>
This commit is contained in:
Xingyu Liu
2026-01-02 16:13:15 -07:00
committed by GitHub
parent a0e9ee83c7
commit 0eee877f67
11 changed files with 1121 additions and 287 deletions

View File

@@ -16,6 +16,10 @@ from transformers.models.qwen3.configuration_qwen3 import Qwen3Config
from transformers.models.qwen3_moe.configuration_qwen3_moe import Qwen3MoeConfig
from vllm.config.model import ModelConfig, get_hf_text_config
from vllm.transformers_utils.model_arch_config_convertor import (
MODEL_ARCH_CONFIG_CONVERTORS,
ModelArchConfigConvertorBase,
)
from vllm.v1.metrics.perf import (
AttentionMetrics,
BaseConfigParser,
@@ -33,6 +37,12 @@ class MockModelConfig:
def __init__(self, hf_config, dtype):
self.hf_config = hf_config
self.hf_text_config = get_hf_text_config(hf_config)
convertor_cls = MODEL_ARCH_CONFIG_CONVERTORS.get(
self.hf_config.model_type, ModelArchConfigConvertorBase
)
self.model_arch_config = convertor_cls(
self.hf_config, self.hf_text_config
).convert()
self.dtype = dtype
self.is_attention_free = False