[Model] Add Qwen3 and Qwen3MoE (#15289)

Signed-off-by: YamPengLi <yampayne.lyp@alibaba-inc.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
2025-04-07 19:06:41 +08:00
parent e9ba99f296
commit 7699258ef0
6 changed files with 893 additions and 5 deletions
--- a/vllm/model_executor/models/qwen2.py
+++ b/vllm/model_executor/models/qwen2.py
@@ -263,7 +263,11 @@ class Qwen2DecoderLayer(nn.Module):
    })
 class Qwen2Model(nn.Module):

-    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+    def __init__(self,
+                 *,
+                 vllm_config: VllmConfig,
+                 prefix: str = "",
+                 decoder_layer_type: type[nn.Module] = Qwen2DecoderLayer):
        super().__init__()

        config = vllm_config.model_config.hf_config
@@ -297,12 +301,14 @@ class Qwen2Model(nn.Module):
        else:
            self.embed_tokens = PPMissingLayer()

+        # Use the provided decoder layer type or default to Qwen2DecoderLayer
+        decoder_layer_type = decoder_layer_type or Qwen2DecoderLayer
        self.start_layer, self.end_layer, self.layers = make_layers(
            config.num_hidden_layers,
-            lambda prefix: Qwen2DecoderLayer(config=config,
-                                             cache_config=cache_config,
-                                             quant_config=quant_config,
-                                             prefix=prefix),
+            lambda prefix: decoder_layer_type(config=config,
+                                              cache_config=cache_config,
+                                              quant_config=quant_config,
+                                              prefix=prefix),
            prefix=f"{prefix}.layers",
        )