[Models] Intern-S1-Pro (#33636)

Signed-off-by: zxy <zhou0493@e.ntu.edu.sg> Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn> Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
2026-02-03 21:49:45 +08:00
parent be8168ff88
commit a3acfa1071
11 changed files with 942 additions and 11 deletions
--- a/vllm/model_executor/models/qwen3_moe.py
+++ b/vllm/model_executor/models/qwen3_moe.py
@@ -428,7 +428,13 @@ class Qwen3MoeDecoderLayer(nn.Module):

@support_torch_compile
 class Qwen3MoeModel(nn.Module):
-    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+    def __init__(
+        self,
+        *,
+        vllm_config: VllmConfig,
+        prefix: str = "",
+        decoder_layer_type: type[torch.nn.Module] = Qwen3MoeDecoderLayer,
+    ):
        super().__init__()

        config = vllm_config.model_config.hf_text_config
@@ -449,7 +455,7 @@ class Qwen3MoeModel(nn.Module):
        )
        self.start_layer, self.end_layer, self.layers = make_layers(
            config.num_hidden_layers,
-            lambda prefix: Qwen3MoeDecoderLayer(vllm_config=vllm_config, prefix=prefix),
+            lambda prefix: decoder_layer_type(vllm_config=vllm_config, prefix=prefix),
            prefix=f"{prefix}.layers",
        )
        self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)