Update deprecated type hinting in models (#18132)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-05-15 06:06:50 +01:00
parent 83f74c698f
commit 26d0419309
130 changed files with 971 additions and 901 deletions
--- a/vllm/model_executor/models/olmo2.py
+++ b/vllm/model_executor/models/olmo2.py
@@ -23,8 +23,9 @@
 # limitations under the License.
 """Inference-only OLMo2 model compatible with HuggingFace weights."""

+from collections.abc import Iterable
 from functools import partial
-from typing import Iterable, Optional, Tuple, Union
+from typing import Optional, Union

 import torch
 from torch import nn
@@ -135,7 +136,7 @@ class Olmo2Attention(nn.Module):
        )

    def _apply_qk_norm(self, q: torch.Tensor,
-                       k: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+                       k: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
        if self.tp_size > 1:
            q = tensor_model_parallel_all_gather(q.contiguous())
            k = tensor_model_parallel_all_gather(k.contiguous())
@@ -365,7 +366,7 @@ class Olmo2ForCausalLM(nn.Module, SupportsPP):
                                       sampling_metadata)
        return logits

-    def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
        stacked_params_mapping = [
            # (param_name, shard_name, shard_id)
            ("qkv_proj", "q_proj", "q"),