Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-05 15:06:22 +01:00
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions
--- a/vllm/model_executor/models/lfm2.py
+++ b/vllm/model_executor/models/lfm2.py
@@ -14,30 +14,40 @@ from vllm.config import CacheConfig, ModelConfig, VllmConfig
 from vllm.distributed import get_pp_group, get_tensor_model_parallel_world_size
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.layernorm import RMSNorm
-from vllm.model_executor.layers.linear import (MergedColumnParallelLinear,
-                                               QKVParallelLinear,
-                                               RowParallelLinear)
+from vllm.model_executor.layers.linear import (
+    MergedColumnParallelLinear,
+    QKVParallelLinear,
+    RowParallelLinear,
+)
 from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.mamba.mamba_utils import (
-    MambaStateDtypeCalculator, MambaStateShapeCalculator)
+    MambaStateDtypeCalculator,
+    MambaStateShapeCalculator,
+)
 from vllm.model_executor.layers.mamba.short_conv import ShortConv
 from vllm.model_executor.layers.quantization import QuantizationConfig
 from vllm.model_executor.layers.rotary_embedding import get_rope
 from vllm.model_executor.layers.vocab_parallel_embedding import (
-    DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding)
+    DEFAULT_VOCAB_PADDING_SIZE,
+    ParallelLMHead,
+    VocabParallelEmbedding,
+)
 from vllm.model_executor.model_loader.weight_utils import default_weight_loader
 from vllm.sequence import IntermediateTensors

-from .interfaces import (HasInnerState, IsHybrid, SupportsLoRA, SupportsPP,
-                         SupportsQuant)
-from .utils import (AutoWeightsLoader, PPMissingLayer, extract_layer_index,
-                    is_pp_missing_parameter,
-                    make_empty_intermediate_tensors_factory, make_layers,
-                    maybe_prefix)
+from .interfaces import HasInnerState, IsHybrid, SupportsLoRA, SupportsPP, SupportsQuant
+from .utils import (
+    AutoWeightsLoader,
+    PPMissingLayer,
+    extract_layer_index,
+    is_pp_missing_parameter,
+    make_empty_intermediate_tensors_factory,
+    make_layers,
+    maybe_prefix,
+)


 class Lfm2MLP(nn.Module):
-
    def __init__(
        self,
        dim: int,
@@ -80,7 +90,6 @@ class Lfm2MLP(nn.Module):


 class Lfm2Attention(nn.Module):
-
    def __init__(
        self,
        config: Lfm2Config,
@@ -177,7 +186,6 @@ class Lfm2Attention(nn.Module):


 class Lfm2AttentionDecoderLayer(nn.Module):
-
    def __init__(
        self,
        config: Lfm2Config,
@@ -195,11 +203,12 @@ class Lfm2AttentionDecoderLayer(nn.Module):
        rope_theta = getattr(config, "rope_theta", 10000)
        rope_scaling = getattr(config, "rope_scaling", None)
        if rope_scaling is not None and getattr(
-                config, "original_max_position_embeddings", None):
+            config, "original_max_position_embeddings", None
+        ):
            rope_scaling["original_max_position_embeddings"] = (
-                config.original_max_position_embeddings)
-        max_position_embeddings = getattr(config, "max_position_embeddings",
-                                          8192)
+                config.original_max_position_embeddings
+            )
+        max_position_embeddings = getattr(config, "max_position_embeddings", 8192)

        self.self_attn = Lfm2Attention(
            config=config,
@@ -238,16 +247,13 @@ class Lfm2AttentionDecoderLayer(nn.Module):
            residual = hidden_states
            hidden_states = self.operator_norm(hidden_states)
        else:
-            hidden_states, residual = self.operator_norm(
-                hidden_states, residual)
-        hidden_states = self.self_attn(positions=positions,
-                                       hidden_states=hidden_states)
+            hidden_states, residual = self.operator_norm(hidden_states, residual)
+        hidden_states = self.self_attn(positions=positions, hidden_states=hidden_states)
        hidden_states, residual = self.ffn_norm(hidden_states, residual)
        return self.feed_forward(hidden_states), residual


 class Lfm2ShortConvDecoderLayer(nn.Module):
-
    def __init__(
        self,
        config: Lfm2Config,
@@ -290,8 +296,7 @@ class Lfm2ShortConvDecoderLayer(nn.Module):
            residual = hidden_states
            hidden_states = self.operator_norm(hidden_states)
        else:
-            hidden_states, residual = self.operator_norm(
-                hidden_states, residual)
+            hidden_states, residual = self.operator_norm(hidden_states, residual)
        output = torch.empty_like(hidden_states)
        self.conv(
            hidden_states,
@@ -304,7 +309,6 @@ class Lfm2ShortConvDecoderLayer(nn.Module):

@support_torch_compile
 class Lfm2Model(nn.Module):
-
    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
        super().__init__()

@@ -315,21 +319,24 @@ class Lfm2Model(nn.Module):
        lora_config = vllm_config.lora_config

        self.config = config
-        lora_vocab = ((lora_config.lora_extra_vocab_size *
-                       (lora_config.max_loras or 1)) if lora_config else 0)
+        lora_vocab = (
+            (lora_config.lora_extra_vocab_size * (lora_config.max_loras or 1))
+            if lora_config
+            else 0
+        )
        self.vocab_size = config.vocab_size + lora_vocab
        self.org_vocab_size = config.vocab_size

        self.embed_tokens = VocabParallelEmbedding(
-            self.vocab_size,
-            config.hidden_size,
-            org_num_embeddings=config.vocab_size)
+            self.vocab_size, config.hidden_size, org_num_embeddings=config.vocab_size
+        )

        def get_layer(prefix: str):
            layer_idx = extract_layer_index(prefix)
            is_attn = self.config.layer_types[layer_idx] == "full_attention"
-            layer_class = (Lfm2AttentionDecoderLayer
-                           if is_attn else Lfm2ShortConvDecoderLayer)
+            layer_class = (
+                Lfm2AttentionDecoderLayer if is_attn else Lfm2ShortConvDecoderLayer
+            )
            return layer_class(
                config,
                layer_idx,
@@ -340,14 +347,14 @@ class Lfm2Model(nn.Module):
            )

        self.start_layer, self.end_layer, self.layers = make_layers(
-            config.num_hidden_layers, get_layer, prefix=f"{prefix}.layers")
-        self.make_empty_intermediate_tensors = (
-            make_empty_intermediate_tensors_factory(
-                ["hidden_states", "residual"], config.hidden_size))
+            config.num_hidden_layers, get_layer, prefix=f"{prefix}.layers"
+        )
+        self.make_empty_intermediate_tensors = make_empty_intermediate_tensors_factory(
+            ["hidden_states", "residual"], config.hidden_size
+        )

        if get_pp_group().is_last_rank:
-            self.embedding_norm = RMSNorm(config.hidden_size,
-                                          eps=config.norm_eps)
+            self.embedding_norm = RMSNorm(config.hidden_size, eps=config.norm_eps)
        else:
            self.embedding_norm = PPMissingLayer()

@@ -379,15 +386,13 @@ class Lfm2Model(nn.Module):
                residual=residual,
            )
        if not get_pp_group().is_last_rank:
-            return IntermediateTensors({
-                "hidden_states": hidden_states,
-                "residual": residual
-            })
+            return IntermediateTensors(
+                {"hidden_states": hidden_states, "residual": residual}
+            )
        hidden_states, _ = self.embedding_norm(hidden_states, residual)
        return hidden_states

-    def load_weights(self, weights: Iterable[tuple[str,
-                                                   torch.Tensor]]) -> set[str]:
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
        stacked_params_mapping = [
            (".qkv_proj", ".q_proj", "q"),
            (".qkv_proj", ".k_proj", "k"),
@@ -398,7 +403,6 @@ class Lfm2Model(nn.Module):
        params_dict = dict(self.named_parameters())
        loaded_params: set[str] = set()
        for name, loaded_weight in weights:
-
            for param_name, weight_name, shard_id in stacked_params_mapping:
                if weight_name not in name:
                    continue
@@ -414,15 +418,15 @@ class Lfm2Model(nn.Module):
                if is_pp_missing_parameter(name, self):
                    continue
                param = params_dict[name]
-                weight_loader = getattr(param, "weight_loader",
-                                        default_weight_loader)
+                weight_loader = getattr(param, "weight_loader", default_weight_loader)
                weight_loader(param, loaded_weight)
            loaded_params.add(name)
        return loaded_params


-class Lfm2ForCausalLM(nn.Module, HasInnerState, SupportsLoRA, SupportsPP,
-                      IsHybrid, SupportsQuant):
+class Lfm2ForCausalLM(
+    nn.Module, HasInnerState, SupportsLoRA, SupportsPP, IsHybrid, SupportsQuant
+):
    packed_modules_mapping = {
        "qkv_proj": [
            "q_proj",
@@ -447,7 +451,6 @@ class Lfm2ForCausalLM(nn.Module, HasInnerState, SupportsLoRA, SupportsPP,
        cls,
        vllm_config: "VllmConfig",
    ) -> tuple[torch.dtype, ...]:
-
        return MambaStateDtypeCalculator.short_conv_state_dtype(
            vllm_config.model_config.dtype,
            vllm_config.cache_config.mamba_cache_dtype,
@@ -458,7 +461,7 @@ class Lfm2ForCausalLM(nn.Module, HasInnerState, SupportsLoRA, SupportsPP,
        cls,
        vllm_config: "VllmConfig",
    ) -> tuple[tuple[int, int]]:
-        """ Calculate shapes for LFM2's convolutional cache.
+        """Calculate shapes for LFM2's convolutional cache.

        Args:
            vllm_config: vLLM config
@@ -482,8 +485,9 @@ class Lfm2ForCausalLM(nn.Module, HasInnerState, SupportsLoRA, SupportsPP,
        cache_config = vllm_config.cache_config
        lora_config = vllm_config.lora_config
        scheduler_config = vllm_config.scheduler_config
-        assert (not cache_config.enable_prefix_caching
-                ), "Lfm2 currently does not support prefix caching"
+        assert not cache_config.enable_prefix_caching, (
+            "Lfm2 currently does not support prefix caching"
+        )

        super().__init__()
        self.config = config
@@ -491,8 +495,9 @@ class Lfm2ForCausalLM(nn.Module, HasInnerState, SupportsLoRA, SupportsPP,
        self.scheduler_config = scheduler_config
        self.model_config = vllm_config.model_config

-        self.model = Lfm2Model(vllm_config=vllm_config,
-                               prefix=maybe_prefix(prefix, "model"))
+        self.model = Lfm2Model(
+            vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")
+        )

        if get_pp_group().is_last_rank:
            self.unpadded_vocab_size = self.config.vocab_size
@@ -507,8 +512,9 @@ class Lfm2ForCausalLM(nn.Module, HasInnerState, SupportsLoRA, SupportsPP,
                    DEFAULT_VOCAB_PADDING_SIZE
                    # We need bigger padding if using lora for kernel
                    # compatibility
-                    if not lora_config else
-                    lora_config.lora_vocab_padding_size),
+                    if not lora_config
+                    else lora_config.lora_vocab_padding_size
+                ),
                quant_config=quant_config,
                prefix=maybe_prefix(prefix, "lm_head"),
            )
@@ -516,11 +522,13 @@ class Lfm2ForCausalLM(nn.Module, HasInnerState, SupportsLoRA, SupportsPP,
        else:
            self.lm_head = PPMissingLayer()

-        self.logits_processor = LogitsProcessor(self.unpadded_vocab_size,
-                                                config.vocab_size)
+        self.logits_processor = LogitsProcessor(
+            self.unpadded_vocab_size, config.vocab_size
+        )

        self.make_empty_intermediate_tensors = (
-            self.model.make_empty_intermediate_tensors)
+            self.model.make_empty_intermediate_tensors
+        )

    def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor:
        return self.model.get_input_embeddings(input_ids)
@@ -533,19 +541,18 @@ class Lfm2ForCausalLM(nn.Module, HasInnerState, SupportsLoRA, SupportsPP,
        inputs_embeds: Optional[torch.Tensor] = None,
        **kwargs,
    ) -> torch.Tensor:
-        hidden_states = self.model(input_ids, positions, intermediate_tensors,
-                                   inputs_embeds)
+        hidden_states = self.model(
+            input_ids, positions, intermediate_tensors, inputs_embeds
+        )
        return hidden_states

    def compute_logits(self, hidden_states: torch.Tensor) -> torch.Tensor:
        logits = self.logits_processor(self.lm_head, hidden_states)
        return logits

-    def load_weights(self, weights: Iterable[tuple[str,
-                                                   torch.Tensor]]) -> set[str]:
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
        loader = AutoWeightsLoader(
            self,
-            skip_prefixes=(["lm_head."]
-                           if self.config.tie_word_embeddings else None),
+            skip_prefixes=(["lm_head."] if self.config.tie_word_embeddings else None),
        )
        return loader.load_weights(weights)