[Model] Add Qwen3 and Qwen3MoE (#15289)
Signed-off-by: YamPengLi <yampayne.lyp@alibaba-inc.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
@@ -263,7 +263,11 @@ class Qwen2DecoderLayer(nn.Module):
|
||||
})
|
||||
class Qwen2Model(nn.Module):
|
||||
|
||||
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
|
||||
def __init__(self,
|
||||
*,
|
||||
vllm_config: VllmConfig,
|
||||
prefix: str = "",
|
||||
decoder_layer_type: type[nn.Module] = Qwen2DecoderLayer):
|
||||
super().__init__()
|
||||
|
||||
config = vllm_config.model_config.hf_config
|
||||
@@ -297,12 +301,14 @@ class Qwen2Model(nn.Module):
|
||||
else:
|
||||
self.embed_tokens = PPMissingLayer()
|
||||
|
||||
# Use the provided decoder layer type or default to Qwen2DecoderLayer
|
||||
decoder_layer_type = decoder_layer_type or Qwen2DecoderLayer
|
||||
self.start_layer, self.end_layer, self.layers = make_layers(
|
||||
config.num_hidden_layers,
|
||||
lambda prefix: Qwen2DecoderLayer(config=config,
|
||||
cache_config=cache_config,
|
||||
quant_config=quant_config,
|
||||
prefix=prefix),
|
||||
lambda prefix: decoder_layer_type(config=config,
|
||||
cache_config=cache_config,
|
||||
quant_config=quant_config,
|
||||
prefix=prefix),
|
||||
prefix=f"{prefix}.layers",
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user