From afd089f231d714e7fd06b51e3bc7df7fe004c7f9 Mon Sep 17 00:00:00 2001 From: lailoo Date: Sun, 1 Mar 2026 11:27:37 +0800 Subject: [PATCH] [Bugfix][Model] Fix Qwen3.5/Qwen3Next ignoring --dtype flag on older GPUs (#35617) --- vllm/model_executor/models/qwen3_5.py | 2 -- vllm/model_executor/models/qwen3_next.py | 3 --- 2 files changed, 5 deletions(-) diff --git a/vllm/model_executor/models/qwen3_5.py b/vllm/model_executor/models/qwen3_5.py index 731bf3947..66d8ff8e1 100644 --- a/vllm/model_executor/models/qwen3_5.py +++ b/vllm/model_executor/models/qwen3_5.py @@ -274,7 +274,6 @@ class Qwen3_5DecoderLayer(Qwen3NextDecoderLayer): 1, 1, config.hidden_size, - dtype=config.dtype, ), ) self.ffn_layer_scale = torch.nn.Parameter( @@ -282,7 +281,6 @@ class Qwen3_5DecoderLayer(Qwen3NextDecoderLayer): 1, 1, config.hidden_size, - dtype=config.dtype, ), ) diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py index c57265cc7..7f1386d7b 100644 --- a/vllm/model_executor/models/qwen3_next.py +++ b/vllm/model_executor/models/qwen3_next.py @@ -463,7 +463,6 @@ class Qwen3NextGatedDeltaNet(nn.Module, MambaBase): group_size=None, norm_before_gate=True, device=current_platform.current_device(), - dtype=config.dtype, ) self.out_proj = RowParallelLinear( @@ -1018,7 +1017,6 @@ class Qwen3NextDecoderLayer(nn.Module): 1, 1, config.hidden_size, - dtype=config.dtype, ), ) self.ffn_layer_scale = torch.nn.Parameter( @@ -1026,7 +1024,6 @@ class Qwen3NextDecoderLayer(nn.Module): 1, 1, config.hidden_size, - dtype=config.dtype, ), )