[New Model] DeepSeek-V3.2 (Rebased to Main) (#25896)

Signed-off-by: Chen Zhang <zhangch99@outlook.com> Signed-off-by: youkaichao <youkaichao@gmail.com> Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com> Signed-off-by: mgoin <mgoin64@gmail.com> Signed-off-by: NickLucche <nlucches@redhat.com> Signed-off-by: Yongye Zhu <zyy1102000@gmail.com> Signed-off-by: Barry Kang <43644113+Barry-Delaney@users.noreply.github.com> Signed-off-by: Lucia Fang <fanglu@meta.com> Co-authored-by: Chen Zhang <zhangch99@outlook.com> Co-authored-by: youkaichao <youkaichao@gmail.com> Co-authored-by: Lucas Wilkinson <lwilkins@redhat.com> Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com> Co-authored-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com> Co-authored-by: yewentao256 <zhyanwentao@126.com> Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: mgoin <mgoin64@gmail.com> Co-authored-by: Lucia Fang <116399278+luccafong@users.noreply.github.com> Co-authored-by: Lucia Fang <fanglu@meta.com> Co-authored-by: NickLucche <nlucches@redhat.com> Co-authored-by: Siyuan Fu <siyuanf@nvidia.com> Co-authored-by: Matthew Bonanni <mbonanni@redhat.com> Co-authored-by: Xiaozhu Meng <mxz297@gmail.com> Co-authored-by: Barry Kang <43644113+Barry-Delaney@users.noreply.github.com> Signed-off-by: simon-mo <simon.mo@hey.com>
2025-09-30 05:14:41 -04:00
parent 03df0fb5d2
commit b3230e1ac0
71 changed files with 3915 additions and 218 deletions
--- a/tests/models/test_initialization.py
+++ b/tests/models/test_initialization.py
@@ -8,7 +8,8 @@ import pytest

 from vllm import LLM
 from vllm.utils import GiB_bytes
-from vllm.v1.core.kv_cache_utils import get_kv_cache_configs
+from vllm.v1.core.kv_cache_utils import (generate_scheduler_kv_cache_config,
+                                         get_kv_cache_configs)
 from vllm.v1.engine.core import EngineCore as V1EngineCore

 from ..utils import create_new_process_for_each_test
@@ -62,11 +63,13 @@ def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch,
    # Avoid calling model.forward()
    def _initialize_kv_caches_v1(self, vllm_config):
        kv_cache_specs = self.model_executor.get_kv_cache_specs()
-        scheduler_kv_cache_config = get_kv_cache_configs(
+        kv_cache_configs = get_kv_cache_configs(
            vllm_config,
            kv_cache_specs,
            [10 * GiB_bytes],
-        )[0]
+        )
+        scheduler_kv_cache_config = generate_scheduler_kv_cache_config(
+            kv_cache_configs)

        # gpu_blocks (> 0), cpu_blocks, scheduler_kv_cache_config
        return 1, 0, scheduler_kv_cache_config