diff --git a/vllm/model_executor/models/glm4_moe.py b/vllm/model_executor/models/glm4_moe.py index b1f37eba4..efa6c1cfe 100644 --- a/vllm/model_executor/models/glm4_moe.py +++ b/vllm/model_executor/models/glm4_moe.py @@ -478,20 +478,6 @@ class Glm4MoeModel(nn.Module): hidden_states, _ = self.norm(hidden_states, residual) return hidden_states - def make_empty_intermediate_tensors( - self, batch_size: int, dtype: torch.dtype, device: torch.device - ) -> IntermediateTensors: - return IntermediateTensors( - { - "hidden_states": torch.zeros( - (batch_size, self.config.hidden_size), dtype=dtype, device=device - ), - "residual": torch.zeros( - (batch_size, self.config.hidden_size), dtype=dtype, device=device - ), - } - ) - def get_expert_mapping(self) -> list[tuple[str, str, int, str]]: # Params for weights, fp8 weight scales, fp8 activation scales # (param_name, weight_name, expert_id, shard_id)